diff --git a/bolt/include/bolt/Profile/Heatmap.h b/bolt/include/bolt/Profile/Heatmap.h index 9813e7fed486d..fc1e2cd30011e 100644 --- a/bolt/include/bolt/Profile/Heatmap.h +++ b/bolt/include/bolt/Profile/Heatmap.h @@ -52,9 +52,6 @@ class Heatmap { : BucketSize(BucketSize), MinAddress(MinAddress), MaxAddress(MaxAddress), TextSections(TextSections) {} - uint64_t HotStart{0}; - uint64_t HotEnd{0}; - inline bool ignoreAddress(uint64_t Address) const { return (Address > MaxAddress) || (Address < MinAddress); } diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index 6beb60741406e..c7db9d262e942 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -1316,14 +1316,6 @@ std::error_code DataAggregator::printLBRHeatMap() { } Heatmap HM(opts::HeatmapBlock, opts::HeatmapMinAddress, opts::HeatmapMaxAddress, getTextSections(BC)); - auto getSymbolValue = [&](const MCSymbol *Symbol) -> uint64_t { - if (Symbol) - if (ErrorOr SymValue = BC->getSymbolValue(*Symbol)) - return SymValue.get(); - return 0; - }; - HM.HotStart = getSymbolValue(BC->getHotTextStartSymbol()); - HM.HotEnd = getSymbolValue(BC->getHotTextEndSymbol()); if (!NumTotalSamples) { if (opts::BasicAggregation) { diff --git a/bolt/lib/Profile/Heatmap.cpp b/bolt/lib/Profile/Heatmap.cpp index c66c2e5487613..003db3cc61137 100644 --- a/bolt/lib/Profile/Heatmap.cpp +++ b/bolt/lib/Profile/Heatmap.cpp @@ -8,7 +8,6 @@ #include "bolt/Profile/Heatmap.h" #include "bolt/Utils/CommandLineOpts.h" -#include "llvm/ADT/AddressRanges.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/Debug.h" @@ -314,9 +313,6 @@ void Heatmap::printSectionHotness(raw_ostream &OS) const { UnmappedHotness += Frequency; }; - AddressRange HotTextRange(HotStart, HotEnd); - StringRef HotTextName = "[hot text]"; - for (const std::pair &KV : Map) { NumTotalCounts += KV.second; // We map an address bucket to the first section (lowest address) @@ -332,24 +328,15 @@ void Heatmap::printSectionHotness(raw_ostream &OS) const { } SectionHotness[TextSections[TextSectionIndex].Name] += KV.second; ++BucketUtilization[TextSections[TextSectionIndex].Name]; - if (HotTextRange.contains(Address)) { - SectionHotness[HotTextName] += KV.second; - ++BucketUtilization[HotTextName]; - } } - std::vector Sections(TextSections); - // Append synthetic hot text section to TextSections - if (!HotTextRange.empty()) - Sections.emplace_back(SectionNameAndRange{HotTextName, HotStart, HotEnd}); - assert(NumTotalCounts > 0 && "total number of heatmap buckets should be greater than 0"); OS << "Section Name, Begin Address, End Address, Percentage Hotness, " << "Utilization Pct, Partition Score\n"; const uint64_t MappedCounts = NumTotalCounts - UnmappedHotness; - for (const auto [Name, Begin, End] : Sections) { + for (const auto [Name, Begin, End] : TextSections) { const float Hotness = 1. * SectionHotness[Name] / NumTotalCounts; const float MappedHotness = MappedCounts ? 1. * SectionHotness[Name] / MappedCounts : 0; diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index dd519431fb2e3..614938d0e3b65 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -968,9 +968,8 @@ void RewriteInstance::discoverFileObjects() { continue; } - // Ignore input hot markers unless in heatmap mode - if ((SymName == "__hot_start" || SymName == "__hot_end") && - !opts::HeatmapMode) + // Ignore input hot markers + if (SymName == "__hot_start" || SymName == "__hot_end") continue; FileSymRefs.emplace(SymbolAddress, Symbol); diff --git a/bolt/test/X86/callcont-fallthru.s b/bolt/test/X86/callcont-fallthru.s index 4994cfb541eef..44e3bf21c14c0 100644 --- a/bolt/test/X86/callcont-fallthru.s +++ b/bolt/test/X86/callcont-fallthru.s @@ -6,7 +6,7 @@ # RUN: %clangxx %cxxflags %s %t.so -o %t -Wl,-q -nostdlib # RUN: link_fdata %s %t %t.pat PREAGGT1 # RUN: link_fdata %s %t %t.pat2 PREAGGT2 -# RUN-DISABLED: link_fdata %s %t %t.patplt PREAGGPLT +# RUN: link_fdata %s %t %t.patplt PREAGGPLT # RUN: llvm-strip --strip-unneeded %t -o %t.strip # RUN: llvm-objcopy --remove-section=.eh_frame %t.strip %t.noeh @@ -26,8 +26,8 @@ ## Check pre-aggregated traces don't report zero-sized PLT fall-through as ## invalid trace -# RUN-DISABLED: llvm-bolt %t.strip --pa -p %t.patplt -o %t.out | FileCheck %s \ -# RUN-DISABLED: --check-prefix=CHECK-PLT +# RUN: llvm-bolt %t.strip --pa -p %t.patplt -o %t.out | FileCheck %s \ +# RUN: --check-prefix=CHECK-PLT # CHECK-PLT: traces mismatching disassembled function contents: 0 .globl foo diff --git a/bolt/test/X86/heatmap-preagg.test b/bolt/test/X86/heatmap-preagg.test index 306e74800a353..702dc804f5133 100644 --- a/bolt/test/X86/heatmap-preagg.test +++ b/bolt/test/X86/heatmap-preagg.test @@ -13,7 +13,6 @@ RUN: --reorder-functions=cdsort --enable-bat --dyno-stats --skip-funcs=main RUN: llvm-bolt-heatmap %t.out -o %t2 --pa -p %p/Inputs/blarge_new_bat.preagg.txt \ RUN: 2>&1 | FileCheck --check-prefix CHECK-HEATMAP-BAT %s RUN: FileCheck %s --check-prefix CHECK-SEC-HOT-BAT --input-file %t2-section-hotness.csv -RUN: llvm-nm -n %t.out | FileCheck %s --check-prefix=CHECK-HOT-SYMS CHECK-HEATMAP: PERF2BOLT: read 81 aggregated LBR entries CHECK-HEATMAP: HEATMAP: invalid traces: 1 @@ -34,6 +33,3 @@ CHECK-SEC-HOT-BAT-NEXT: .bolt.org.text, 0x4010b0, 0x401c25, 38.3385, 51.0638, 0. CHECK-SEC-HOT-BAT-NEXT: .fini, 0x401c28, 0x401c35, 0.0000, 0.0000, 0.0000 CHECK-SEC-HOT-BAT-NEXT: .text, 0x800000, 0x8002cc, 38.7595, 91.6667, 0.3553 CHECK-SEC-HOT-BAT-NEXT: .text.cold, 0x800300, 0x800415, 0.0000, 0.0000, 0.0000 -CHECK-SEC-HOT-BAT-NEXT: [hot text], 0x800000, 0x8002cc, 38.7595, 91.6667, 0.3553 -CHECK-HOT-SYMS: 800000 W __hot_start -CHECK-HOT-SYMS: 8002cc W __hot_end diff --git a/clang/docs/CMakeLists.txt b/clang/docs/CMakeLists.txt index 1f06c040c96cb..ca625efc6ccef 100644 --- a/clang/docs/CMakeLists.txt +++ b/clang/docs/CMakeLists.txt @@ -134,34 +134,6 @@ if (LLVM_ENABLE_SPHINX) gen_rst_file_from_td(DiagnosticsReference.rst -gen-diag-docs ../include/clang/Basic/Diagnostic.td "${docs_targets}") gen_rst_file_from_td(ClangCommandLineReference.rst -gen-opt-docs ../include/clang/Driver/ClangOptionDocs.td "${docs_targets}") - # Another generated file from a different source - set(docs_tools_dir ${CMAKE_CURRENT_SOURCE_DIR}/tools) - set(aopts_rst_rel_path analyzer/user-docs/Options.rst) - set(aopts_rst "${CMAKE_CURRENT_BINARY_DIR}/${aopts_rst_rel_path}") - set(analyzeroptions_def "${CMAKE_CURRENT_SOURCE_DIR}/../include/clang/StaticAnalyzer/Core/AnalyzerOptions.def") - set(aopts_rst_in "${CMAKE_CURRENT_SOURCE_DIR}/${aopts_rst_rel_path}.in") - add_custom_command( - OUTPUT ${aopts_rst} - COMMAND ${Python3_EXECUTABLE} generate_analyzer_options_docs.py - --options-def "${analyzeroptions_def}" - --template "${aopts_rst_in}" - --out "${aopts_rst}" - WORKING_DIRECTORY ${docs_tools_dir} - VERBATIM - COMMENT "Generating ${aopts_rst}" - DEPENDS ${docs_tools_dir}/${generate_aopts_docs} - ${aopts_rst_in} - copy-clang-rst-docs - ) - add_custom_target(generate-analyzer-options-rst DEPENDS ${aopts_rst}) - foreach(target ${docs_targets}) - add_dependencies(${target} generate-analyzer-options-rst) - endforeach() - - # Technically this is redundant because generate-analyzer-options-rst - # depends on the copy operation (because it wants to drop a generated file - # into a subdirectory of the copied tree), but I'm leaving it here for the - # sake of clarity. foreach(target ${docs_targets}) add_dependencies(${target} copy-clang-rst-docs) endforeach() diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 985cf264876e9..5748339015906 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -520,12 +520,12 @@ Improvements to Clang's diagnostics - Several compatibility diagnostics that were incorrectly being grouped under ``-Wpre-c++20-compat`` are now part of ``-Wc++20-compat``. (#GH138775) -- Improved the ``-Wtautological-overlap-compare`` diagnostics to warn about overlapping and non-overlapping ranges involving character literals and floating-point literals. +- Improved the ``-Wtautological-overlap-compare`` diagnostics to warn about overlapping and non-overlapping ranges involving character literals and floating-point literals. The warning message for non-overlapping cases has also been improved (#GH13473). - Fixed a duplicate diagnostic when performing typo correction on function template calls with explicit template arguments. (#GH139226) - + - Explanatory note is printed when ``assert`` fails during evaluation of a constant expression. Prior to this, the error inaccurately implied that assert could not be used at all in a constant expression (#GH130458) @@ -533,23 +533,6 @@ Improvements to Clang's diagnostics - A new off-by-default warning ``-Wms-bitfield-padding`` has been added to alert to cases where bit-field packing may differ under the MS struct ABI (#GH117428). -- ``-Watomic-access`` no longer fires on unreachable code. e.g., - - .. code-block:: c - - _Atomic struct S { int a; } s; - void func(void) { - if (0) - s.a = 12; // Previously diagnosed with -Watomic-access, now silenced - s.a = 12; // Still diagnosed with -Watomic-access - return; - s.a = 12; // Previously diagnosed, now silenced - } - - -- A new ``-Wcharacter-conversion`` warns where comparing or implicitly converting - between different Unicode character types (``char8_t``, ``char16_t``, ``char32_t``). - This warning only triggers in C++ as these types are aliases in C. (#GH138526) Improvements to Clang's time-trace ---------------------------------- @@ -614,7 +597,6 @@ Bug Fixes in This Version - Fixed a crash with an invalid member function parameter list with a default argument which contains a pragma. (#GH113722) - Fixed assertion failures when generating name lookup table in modules. (#GH61065, #GH134739) -- Fixed an assertion failure in constant compound literal statements. (#GH139160) Bug Fixes to Compiler Builtins ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -727,8 +709,6 @@ Bug Fixes to C++ Support - Fixed the handling of pack indexing types in the constraints of a member function redeclaration. (#GH138255) - Clang now correctly parses arbitrary order of ``[[]]``, ``__attribute__`` and ``alignas`` attributes for declarations (#GH133107) - Fixed a crash when forming an invalid function type in a dependent context. (#GH138657) (#GH115725) (#GH68852) -- Clang no longer segfaults when there is a configuration mismatch between modules and their users (http://crbug.com/400353616). -- Fix an incorrect deduction when calling an explicit object member function template through an overload set address. Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -877,11 +857,6 @@ clang-format - Add ``OneLineFormatOffRegex`` option for turning formatting off for one line. - Add ``SpaceAfterOperatorKeyword`` option. -clang-refactor --------------- -- Reject `0` as column or line number in 1-based command-line source locations. - Fixes crash caused by `0` input in `-selection=::[-:]`. (#GH139457) - libclang -------- - Fixed a bug in ``clang_File_isEqual`` that sometimes led to different @@ -900,8 +875,6 @@ libclang Code Completion --------------- -- Reject `0` as column or line number in 1-based command-line source locations. - Fixes crash caused by `0` input in `-code-completion-at=::`. (#GH139457) Static Analyzer --------------- diff --git a/clang/docs/analyzer/user-docs.rst b/clang/docs/analyzer/user-docs.rst index 67c1dfaa40965..e265f033a2c54 100644 --- a/clang/docs/analyzer/user-docs.rst +++ b/clang/docs/analyzer/user-docs.rst @@ -8,7 +8,6 @@ Contents: user-docs/Installation user-docs/CommandLineUsage - user-docs/Options user-docs/UsingWithXCode user-docs/FilingBugs user-docs/CrossTranslationUnit diff --git a/clang/docs/analyzer/user-docs/CommandLineUsage.rst b/clang/docs/analyzer/user-docs/CommandLineUsage.rst index 0252de80b788f..59f8187f374a9 100644 --- a/clang/docs/analyzer/user-docs/CommandLineUsage.rst +++ b/clang/docs/analyzer/user-docs/CommandLineUsage.rst @@ -194,8 +194,6 @@ When compiling your application to run on the simulator, it is important that ** If you aren't certain which compiler Xcode uses to build your project, try just running ``xcodebuild`` (without **scan-build**). You should see the full path to the compiler that Xcode is using, and use that as an argument to ``--use-cc``. -.. _command-line-usage-CodeChecker: - CodeChecker ----------- diff --git a/clang/docs/analyzer/user-docs/Options.rst.in b/clang/docs/analyzer/user-docs/Options.rst.in deleted file mode 100644 index 0d2883fb9ead1..0000000000000 --- a/clang/docs/analyzer/user-docs/Options.rst.in +++ /dev/null @@ -1,114 +0,0 @@ -======================== -Configuring the Analyzer -======================== - -The clang static analyzer supports two kinds of options: - -1. Global **analyzer options** influence the behavior of the analyzer engine. - They are documented on this page, in the section :ref:`List of analyzer - options`. -2. The **checker options** belong to individual checkers (e.g. - ``core.BitwiseShift:Pedantic`` and ``unix.Stream:Pedantic`` are completely - separate options) and customize the behavior of that particular checker. - These are documented within the documentation of each individual checker at - :doc:`../checkers`. - -Assigning values to options -=========================== - -With the compiler frontend --------------------------- - -All options can be configured by using the ``-analyzer-config`` flag of ``clang --cc1`` (the so-called *compiler frontend* part of clang). The values of the -options are specified with the syntax ``-analyzer-config -OPT=VAL,OPT2=VAL2,...`` which supports specifying multiple options, but -separate flags like ``-analyzer-config OPT=VAL -analyzer-config OPT2=VAL2`` are -also accepted (with equivalent behavior). Analyzer options and checker options -can be freely intermixed here because it's easy to recognize that checker -option names are always prefixed with ``some.groups.NameOfChecker:``. - -.. warning:: - This is an internal interface, one should prefer `clang --analyze ...` for - regular use. Clang does not intend to preserve backwards compatibility or - announce breaking changes within the flags accepted by ``clang -cc1`` - (but ``-analyzer-config`` survived many years without major changes). - -With the clang driver ---------------------- - -In a conventional workflow ``clang -cc1`` (which is a low-level internal -interface) is invoked indirectly by the clang *driver* (i.e. plain ``clang`` -without the ``-cc1`` flag), which acts as an "even more frontend" wrapper layer -around the ``clang -cc1`` *compiler frontend*. In this situation **each** -command line argument intended for the *compiler frontend* must be prefixed -with ``-Xclang``. - -For example the following command analyzes ``foo.c`` in :ref:`shallow mode -` with :ref:`loop unrolling -`: - -:: - - clang --analyze -Xclang -analyzer-config -Xclang mode=shallow,unroll-loops=true foo.c - -When this is executed, the *driver* will compose and execute the following -``clang -cc1`` command (which can be inspected by passing the ``-v`` flag to -the *driver*): - -:: - - clang -cc1 -analyze [...] -analyzer-config mode=shallow,unroll-loops=true foo.c - -Here ``[...]`` stands for dozens of low-level flags which ensure that ``clang --cc1`` does the right thing (e.g. ``-fcolor-diagnostics`` when it's suitable; -``-analyzer-checker`` flags to enable the default set of checkers). Also -note the distinction that the ``clang`` *driver* requires ``--analyze`` (double -dashes) while the ``clang -cc1`` *compiler frontend* requires ``-analyze`` -(single dash). - -.. note:: - The flag ``-Xanalyzer`` is equivalent to ``-Xclang`` in these situations - (but doesn't forward other options of the clang frontend). - -With CodeChecker ----------------- - -If the analysis is performed through :ref:`CodeChecker -` (which e.g. supports the analysis of a whole -project instead of a single file) then it will act as another indirection -layer. CodeChecker provides separate command-line flags called -``--analyzer-config`` (for analyzer options) and ``--checker-config`` (for -checker options): - -:: - - CodeChecker analyze -o outdir --checker-config clangsa:unix.Stream:Pedantic=true \ - --analyzer-config clangsa:mode=shallow clangsa:unroll-loops=true \ - -- compile_commands.json - -These CodeChecker flags may be followed by multiple ``OPT=VAL`` pairs as -separate arguments (and this is why the example needs to use ``--`` before -``compile_commands.json``). The option names are all prefixed with ``clangsa:`` -to ensure that they are passed to the clang static analyzer (and not other -analyzer tools that are also supported by CodeChecker). - -.. _list-of-analyzer-options: - -List of analyzer options -======================== - -.. warning:: - These options are primarily intended for development purposes and - non-default values are usually unsupported. Changing their values may - drastically alter the behavior of the analyzer, and may even result in - instabilities or crashes! Crash reports are welcome and depending on the - severity they may be fixed. - -.. - The contents of this section are automatically generated by the script - clang/docs/tools/generate_analyzer_options_docs.py from the header file - AnalyzerOptions.def to ensure that the RST/web documentation is synchronized - with the command line help options. - -.. OPTIONS_LIST_PLACEHOLDER diff --git a/clang/docs/tools/generate_analyzer_options_docs.py b/clang/docs/tools/generate_analyzer_options_docs.py deleted file mode 100644 index 26c098d8514a0..0000000000000 --- a/clang/docs/tools/generate_analyzer_options_docs.py +++ /dev/null @@ -1,293 +0,0 @@ -#!/usr/bin/env python3 -# A tool to automatically generate documentation for the config options of the -# clang static analyzer by reading `AnalyzerOptions.def`. - -import argparse -from collections import namedtuple -from enum import Enum, auto -import re -import sys -import textwrap - - -# The following code implements a trivial parser for the narrow subset of C++ -# which is used in AnalyzerOptions.def. This supports the following features: -# - ignores preprocessor directives, even if they are continued with \ at EOL -# - ignores comments: both /* ... */ and // ... -# - parses string literals (even if they contain \" escapes) -# - concatenates adjacent string literals -# - parses numbers even if they contain ' as a thousands separator -# - recognizes MACRO(arg1, arg2, ..., argN) calls - - -class TT(Enum): - "Token type enum." - number = auto() - ident = auto() - string = auto() - punct = auto() - - -TOKENS = [ - (re.compile(r"-?[0-9']+"), TT.number), - (re.compile(r"\w+"), TT.ident), - (re.compile(r'"([^\\"]|\\.)*"'), TT.string), - (re.compile(r"[(),]"), TT.punct), - (re.compile(r"/\*((?!\*/).)*\*/", re.S), None), # C-style comment - (re.compile(r"//.*\n"), None), # C++ style oneline comment - (re.compile(r"#.*(\\\n.*)*(?", which is - # OK for a terse command line printout, but should be prettified for web - # documentation. - # Moreover, the option ctu-invocation-list shows some example file content - # which is formatted as a preformatted block. - paragraphs = [desc] - extra = "" - if m := re.search(r"(^|\s)Value:", desc): - err_handler.record_use_of_tweak("accepted values") - paragraphs = [desc[: m.start()], "Accepted values:" + desc[m.end() :]] - elif m := re.search(r"\s*Example file.content:", desc): - err_handler.record_use_of_tweak("example file content") - paragraphs = [desc[: m.start()]] - extra = "Example file content::\n\n " + desc[m.end() :] + "\n\n" - - wrapped = [textwrap.fill(p, width=80) for p in paragraphs if p.strip()] - - return "\n\n".join(wrapped + [""]) + extra - - -def default_to_rst(tok): - if tok.kind == TT.string: - if tok.code == '""': - return "(empty string)" - return tok.code - if tok.kind == TT.ident: - return tok.code - if tok.kind == TT.number: - return tok.code.replace("'", "") - raise ValueError(f"unexpected token as default value: {tok.kind.name}") - - -def defaults_to_rst_paragraph(defaults): - strs = [default_to_rst(d) for d in defaults] - - if len(strs) == 1: - return f"Default value: {strs[0]}\n\n" - if len(strs) == 2: - return ( - f"Default value: {strs[0]} (in shallow mode) / {strs[1]} (in deep mode)\n\n" - ) - raise ValueError("unexpected count of default values: %d" % len(defaults)) - - -def macro_call_to_rst_paragraphs(macro_call): - try: - arg_count = len(macro_call.args) - param_count = MACRO_NAMES_PARAMCOUNTS[macro_call.name] - if arg_count != param_count: - raise ValueError( - f"expected {param_count} arguments for {macro_call.name}, found {arg_count}" - ) - - _, _, cmdflag, desc, *defaults = macro_call.args - - return ( - cmdflag_to_rst_title(cmdflag) - + desc_to_rst_paragraphs(desc) - + defaults_to_rst_paragraph(defaults) - ) - except ValueError as ve: - err_handler.report_error(ve.args[0]) - return "" - - -def get_option_list(input_file): - with open(input_file, encoding="utf-8") as f: - contents = f.read() - tokens = join_strings(tokenize(contents)) - macro_calls = get_calls(tokens, MACRO_NAMES_PARAMCOUNTS) - - result = "" - for mc in macro_calls: - result += macro_call_to_rst_paragraphs(mc) - return result - - -p = argparse.ArgumentParser() -p.add_argument("--options-def", help="path to AnalyzerOptions.def") -p.add_argument("--template", help="template file") -p.add_argument("--out", help="output file") -opts = p.parse_args() - -with open(opts.template, encoding="utf-8") as f: - doc_template = f.read() - -PLACEHOLDER = ".. OPTIONS_LIST_PLACEHOLDER\n" - -rst_output = doc_template.replace(PLACEHOLDER, get_option_list(opts.options_def)) - -err_handler.report_unused_tweaks() - -with open(opts.out, "w", newline="", encoding="utf-8") as f: - f.write(rst_output) - -if err_handler.seen_errors: - sys.exit(1) diff --git a/clang/include/clang/AST/ASTConcept.h b/clang/include/clang/AST/ASTConcept.h index c8f6330a73bb1..078e1e848f393 100644 --- a/clang/include/clang/AST/ASTConcept.h +++ b/clang/include/clang/AST/ASTConcept.h @@ -93,11 +93,11 @@ struct ASTConstraintSatisfaction final : bool ContainsErrors : 1; const UnsatisfiedConstraintRecord *begin() const { - return getTrailingObjects(); + return getTrailingObjects(); } const UnsatisfiedConstraintRecord *end() const { - return getTrailingObjects() + NumRecords; + return getTrailingObjects() + NumRecords; } ASTConstraintSatisfaction(const ASTContext &C, diff --git a/clang/include/clang/AST/ASTDiagnostic.h b/clang/include/clang/AST/ASTDiagnostic.h index baa410e3e4a03..ef22249828629 100644 --- a/clang/include/clang/AST/ASTDiagnostic.h +++ b/clang/include/clang/AST/ASTDiagnostic.h @@ -38,9 +38,6 @@ namespace clang { /// is initialized before passing it in. QualType desugarForDiagnostic(ASTContext &Context, QualType QT, bool &ShouldAKA); - - std::string FormatUTFCodeUnitAsCodepoint(unsigned Value, QualType T); - } // end namespace clang #endif diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h index 9290ff3764c8c..f1013c57e008f 100644 --- a/clang/include/clang/AST/Decl.h +++ b/clang/include/clang/AST/Decl.h @@ -185,7 +185,7 @@ class PragmaCommentDecl final PragmaMSCommentKind getCommentKind() const { return CommentKind; } - StringRef getArg() const { return getTrailingObjects(); } + StringRef getArg() const { return getTrailingObjects(); } // Implement isa/cast/dyncast/etc. static bool classof(const Decl *D) { return classofKind(D->getKind()); } @@ -217,8 +217,8 @@ class PragmaDetectMismatchDecl final static PragmaDetectMismatchDecl * CreateDeserialized(ASTContext &C, GlobalDeclID ID, unsigned NameValueSize); - StringRef getName() const { return getTrailingObjects(); } - StringRef getValue() const { return getTrailingObjects() + ValueStart; } + StringRef getName() const { return getTrailingObjects(); } + StringRef getValue() const { return getTrailingObjects() + ValueStart; } // Implement isa/cast/dyncast/etc. static bool classof(const Decl *D) { return classofKind(D->getKind()); } @@ -1991,7 +1991,7 @@ class FunctionDecl : public DeclaratorDecl, /// Get the unqualified lookup results that should be used in this /// defaulted function definition. ArrayRef getUnqualifiedLookups() const { - return getTrailingObjects(NumLookups); + return {getTrailingObjects(), NumLookups}; } StringLiteral *getDeletedMessage() const { @@ -4780,9 +4780,13 @@ class OutlinedFunctionDecl final explicit OutlinedFunctionDecl(DeclContext *DC, unsigned NumParams); - ImplicitParamDecl *const *getParams() const { return getTrailingObjects(); } + ImplicitParamDecl *const *getParams() const { + return getTrailingObjects(); + } - ImplicitParamDecl **getParams() { return getTrailingObjects(); } + ImplicitParamDecl **getParams() { + return getTrailingObjects(); + } public: friend class ASTDeclReader; @@ -4853,9 +4857,13 @@ class CapturedDecl final explicit CapturedDecl(DeclContext *DC, unsigned NumParams); - ImplicitParamDecl *const *getParams() const { return getTrailingObjects(); } + ImplicitParamDecl *const *getParams() const { + return getTrailingObjects(); + } - ImplicitParamDecl **getParams() { return getTrailingObjects(); } + ImplicitParamDecl **getParams() { + return getTrailingObjects(); + } public: friend class ASTDeclReader; @@ -5179,10 +5187,12 @@ class HLSLRootSignatureDecl final unsigned NumElems; - llvm::hlsl::rootsig::RootElement *getElems() { return getTrailingObjects(); } + llvm::hlsl::rootsig::RootElement *getElems() { + return getTrailingObjects(); + } const llvm::hlsl::rootsig::RootElement *getElems() const { - return getTrailingObjects(); + return getTrailingObjects(); } HLSLRootSignatureDecl(DeclContext *DC, SourceLocation Loc, IdentifierInfo *ID, diff --git a/clang/include/clang/AST/DeclTemplate.h b/clang/include/clang/AST/DeclTemplate.h index 37419d8eb7c9a..80c97681d9163 100644 --- a/clang/include/clang/AST/DeclTemplate.h +++ b/clang/include/clang/AST/DeclTemplate.h @@ -712,7 +712,7 @@ class DependentFunctionTemplateSpecializationInfo final /// Returns the candidates for the primary function template. ArrayRef getCandidates() const { - return getTrailingObjects(NumCandidates); + return {getTrailingObjects(), NumCandidates}; } }; @@ -1325,7 +1325,8 @@ class TemplateTypeParmDecl final : public TypeDecl, /// Returns the type constraint associated with this template parameter (if /// any). const TypeConstraint *getTypeConstraint() const { - return TypeConstraintInitialized ? getTrailingObjects() : nullptr; + return TypeConstraintInitialized ? getTrailingObjects() : + nullptr; } void setTypeConstraint(ConceptReference *CR, @@ -1710,7 +1711,7 @@ class TemplateTemplateParmDecl final /// pack. TemplateParameterList *getExpansionTemplateParameters(unsigned I) const { assert(I < NumExpandedParams && "Out-of-range expansion type index"); - return getTrailingObjects()[I]; + return getTrailingObjects()[I]; } const DefArgStorage &getDefaultArgStorage() const { return DefaultArgument; } @@ -3253,7 +3254,8 @@ class ImplicitConceptSpecializationDecl final unsigned NumTemplateArgs); ArrayRef getTemplateArguments() const { - return getTrailingObjects(NumTemplateArgs); + return ArrayRef(getTrailingObjects(), + NumTemplateArgs); } void setTemplateArguments(ArrayRef Converted); diff --git a/clang/include/clang/AST/OpenACCClause.h b/clang/include/clang/AST/OpenACCClause.h index 67fbdfeb0702f..c44e8388337c1 100644 --- a/clang/include/clang/AST/OpenACCClause.h +++ b/clang/include/clang/AST/OpenACCClause.h @@ -293,7 +293,7 @@ class OpenACCDeviceTypeClause final "Only a single asterisk version is permitted, and must be the " "only one"); - llvm::uninitialized_copy(Archs, getTrailingObjects()); + llvm::uninitialized_copy(Archs, getTrailingObjects()); } public: @@ -307,7 +307,8 @@ class OpenACCDeviceTypeClause final } ArrayRef getArchitectures() const { - return getTrailingObjects(NumArchs); + return ArrayRef( + getTrailingObjects(), NumArchs); } static OpenACCDeviceTypeClause * @@ -420,7 +421,9 @@ class OpenACCSelfClause final // Intentionally internal, meant to be an implementation detail of everything // else. All non-internal uses should go through getConditionExpr/getVarList. - ArrayRef getExprs() const { return getTrailingObjects(NumExprs); } + llvm::ArrayRef getExprs() const { + return {getTrailingObjects(), NumExprs}; + } public: static bool classof(const OpenACCClause *C) { @@ -469,8 +472,8 @@ class OpenACCSelfClause final child_range children() { return child_range( - reinterpret_cast(getTrailingObjects()), - reinterpret_cast(getTrailingObjects() + NumExprs)); + reinterpret_cast(getTrailingObjects()), + reinterpret_cast(getTrailingObjects() + NumExprs)); } const_child_range children() const { @@ -543,10 +546,10 @@ class OpenACCWaitClause final QueuesLoc(QueuesLoc) { // The first element of the trailing storage is always the devnum expr, // whether it is used or not. - auto *Exprs = getTrailingObjects(); + auto *Exprs = getTrailingObjects(); llvm::uninitialized_copy(ArrayRef(DevNumExpr), Exprs); llvm::uninitialized_copy(QueueIdExprs, Exprs + 1); - setExprs(getTrailingObjects(QueueIdExprs.size() + 1)); + setExprs(getTrailingObjects(QueueIdExprs.size() + 1)); } public: @@ -583,7 +586,7 @@ class OpenACCNumGangsClause final ArrayRef IntExprs, SourceLocation EndLoc) : OpenACCClauseWithExprs(OpenACCClauseKind::NumGangs, BeginLoc, LParenLoc, EndLoc) { - setExprs(getTrailingObjects(IntExprs.size()), IntExprs); + setExprs(getTrailingObjects(IntExprs.size()), IntExprs); } public: @@ -611,7 +614,7 @@ class OpenACCTileClause final ArrayRef SizeExprs, SourceLocation EndLoc) : OpenACCClauseWithExprs(OpenACCClauseKind::Tile, BeginLoc, LParenLoc, EndLoc) { - setExprs(getTrailingObjects(SizeExprs.size()), SizeExprs); + setExprs(getTrailingObjects(SizeExprs.size()), SizeExprs); } public: @@ -848,7 +851,7 @@ class OpenACCPrivateClause final ArrayRef VarList, SourceLocation EndLoc) : OpenACCClauseWithVarList(OpenACCClauseKind::Private, BeginLoc, LParenLoc, EndLoc) { - setExprs(getTrailingObjects(VarList.size()), VarList); + setExprs(getTrailingObjects(VarList.size()), VarList); } public: @@ -869,7 +872,7 @@ class OpenACCFirstPrivateClause final ArrayRef VarList, SourceLocation EndLoc) : OpenACCClauseWithVarList(OpenACCClauseKind::FirstPrivate, BeginLoc, LParenLoc, EndLoc) { - setExprs(getTrailingObjects(VarList.size()), VarList); + setExprs(getTrailingObjects(VarList.size()), VarList); } public: @@ -890,7 +893,7 @@ class OpenACCDevicePtrClause final ArrayRef VarList, SourceLocation EndLoc) : OpenACCClauseWithVarList(OpenACCClauseKind::DevicePtr, BeginLoc, LParenLoc, EndLoc) { - setExprs(getTrailingObjects(VarList.size()), VarList); + setExprs(getTrailingObjects(VarList.size()), VarList); } public: @@ -911,7 +914,7 @@ class OpenACCAttachClause final ArrayRef VarList, SourceLocation EndLoc) : OpenACCClauseWithVarList(OpenACCClauseKind::Attach, BeginLoc, LParenLoc, EndLoc) { - setExprs(getTrailingObjects(VarList.size()), VarList); + setExprs(getTrailingObjects(VarList.size()), VarList); } public: @@ -932,7 +935,7 @@ class OpenACCDetachClause final ArrayRef VarList, SourceLocation EndLoc) : OpenACCClauseWithVarList(OpenACCClauseKind::Detach, BeginLoc, LParenLoc, EndLoc) { - setExprs(getTrailingObjects(VarList.size()), VarList); + setExprs(getTrailingObjects(VarList.size()), VarList); } public: @@ -953,7 +956,7 @@ class OpenACCDeleteClause final ArrayRef VarList, SourceLocation EndLoc) : OpenACCClauseWithVarList(OpenACCClauseKind::Delete, BeginLoc, LParenLoc, EndLoc) { - setExprs(getTrailingObjects(VarList.size()), VarList); + setExprs(getTrailingObjects(VarList.size()), VarList); } public: @@ -974,7 +977,7 @@ class OpenACCUseDeviceClause final ArrayRef VarList, SourceLocation EndLoc) : OpenACCClauseWithVarList(OpenACCClauseKind::UseDevice, BeginLoc, LParenLoc, EndLoc) { - setExprs(getTrailingObjects(VarList.size()), VarList); + setExprs(getTrailingObjects(VarList.size()), VarList); } public: @@ -995,7 +998,7 @@ class OpenACCNoCreateClause final ArrayRef VarList, SourceLocation EndLoc) : OpenACCClauseWithVarList(OpenACCClauseKind::NoCreate, BeginLoc, LParenLoc, EndLoc) { - setExprs(getTrailingObjects(VarList.size()), VarList); + setExprs(getTrailingObjects(VarList.size()), VarList); } public: @@ -1016,7 +1019,7 @@ class OpenACCPresentClause final ArrayRef VarList, SourceLocation EndLoc) : OpenACCClauseWithVarList(OpenACCClauseKind::Present, BeginLoc, LParenLoc, EndLoc) { - setExprs(getTrailingObjects(VarList.size()), VarList); + setExprs(getTrailingObjects(VarList.size()), VarList); } public: @@ -1036,7 +1039,7 @@ class OpenACCHostClause final ArrayRef VarList, SourceLocation EndLoc) : OpenACCClauseWithVarList(OpenACCClauseKind::Host, BeginLoc, LParenLoc, EndLoc) { - setExprs(getTrailingObjects(VarList.size()), VarList); + setExprs(getTrailingObjects(VarList.size()), VarList); } public: @@ -1058,7 +1061,7 @@ class OpenACCDeviceClause final ArrayRef VarList, SourceLocation EndLoc) : OpenACCClauseWithVarList(OpenACCClauseKind::Device, BeginLoc, LParenLoc, EndLoc) { - setExprs(getTrailingObjects(VarList.size()), VarList); + setExprs(getTrailingObjects(VarList.size()), VarList); } public: @@ -1085,7 +1088,7 @@ class OpenACCCopyClause final Spelling == OpenACCClauseKind::PCopy || Spelling == OpenACCClauseKind::PresentOrCopy) && "Invalid clause kind for copy-clause"); - setExprs(getTrailingObjects(VarList.size()), VarList); + setExprs(getTrailingObjects(VarList.size()), VarList); } public: @@ -1118,7 +1121,7 @@ class OpenACCCopyInClause final Spelling == OpenACCClauseKind::PCopyIn || Spelling == OpenACCClauseKind::PresentOrCopyIn) && "Invalid clause kind for copyin-clause"); - setExprs(getTrailingObjects(VarList.size()), VarList); + setExprs(getTrailingObjects(VarList.size()), VarList); } public: @@ -1150,7 +1153,7 @@ class OpenACCCopyOutClause final Spelling == OpenACCClauseKind::PCopyOut || Spelling == OpenACCClauseKind::PresentOrCopyOut) && "Invalid clause kind for copyout-clause"); - setExprs(getTrailingObjects(VarList.size()), VarList); + setExprs(getTrailingObjects(VarList.size()), VarList); } public: @@ -1182,7 +1185,7 @@ class OpenACCCreateClause final Spelling == OpenACCClauseKind::PCreate || Spelling == OpenACCClauseKind::PresentOrCreate) && "Invalid clause kind for create-clause"); - setExprs(getTrailingObjects(VarList.size()), VarList); + setExprs(getTrailingObjects(VarList.size()), VarList); } public: @@ -1211,7 +1214,7 @@ class OpenACCReductionClause final : OpenACCClauseWithVarList(OpenACCClauseKind::Reduction, BeginLoc, LParenLoc, EndLoc), Op(Operator) { - setExprs(getTrailingObjects(VarList.size()), VarList); + setExprs(getTrailingObjects(VarList.size()), VarList); } public: @@ -1236,7 +1239,7 @@ class OpenACCLinkClause final ArrayRef VarList, SourceLocation EndLoc) : OpenACCClauseWithVarList(OpenACCClauseKind::Link, BeginLoc, LParenLoc, EndLoc) { - setExprs(getTrailingObjects(VarList.size()), VarList); + setExprs(getTrailingObjects(VarList.size()), VarList); } public: @@ -1259,7 +1262,7 @@ class OpenACCDeviceResidentClause final ArrayRef VarList, SourceLocation EndLoc) : OpenACCClauseWithVarList(OpenACCClauseKind::DeviceResident, BeginLoc, LParenLoc, EndLoc) { - setExprs(getTrailingObjects(VarList.size()), VarList); + setExprs(getTrailingObjects(VarList.size()), VarList); } public: diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index 6fd16bc0f03be..cdecc812f7fb9 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -295,7 +295,8 @@ template class OMPVarListClause : public OMPClause { /// Fetches list of variables associated with this clause. MutableArrayRef getVarRefs() { - return static_cast(this)->template getTrailingObjects(NumVars); + return MutableArrayRef( + static_cast(this)->template getTrailingObjects(), NumVars); } /// Sets the list of variables for this clause. @@ -335,7 +336,8 @@ template class OMPVarListClause : public OMPClause { /// Fetches list of all variables in the clause. ArrayRef getVarRefs() const { - return static_cast(this)->template getTrailingObjects( + return llvm::ArrayRef( + static_cast(this)->template getTrailingObjects(), NumVars); } }; @@ -380,8 +382,10 @@ template class OMPDirectiveListClause : public OMPClause { } MutableArrayRef getDirectiveKinds() { - return static_cast(this) - ->template getTrailingObjects(NumKinds); + return MutableArrayRef( + static_cast(this) + ->template getTrailingObjects(), + NumKinds); } void setDirectiveKinds(ArrayRef DK) { @@ -980,12 +984,14 @@ class OMPSizesClause final /// Returns the tile size expressions. MutableArrayRef getSizesRefs() { - return static_cast(this) - ->template getTrailingObjects(NumSizes); + return MutableArrayRef(static_cast(this) + ->template getTrailingObjects(), + NumSizes); } ArrayRef getSizesRefs() const { - return static_cast(this) - ->template getTrailingObjects(NumSizes); + return ArrayRef(static_cast(this) + ->template getTrailingObjects(), + NumSizes); } /// Sets the tile size expressions. @@ -1084,12 +1090,14 @@ class OMPPermutationClause final /// Returns the permutation index expressions. ///@{ MutableArrayRef getArgsRefs() { - return static_cast(this) - ->template getTrailingObjects(NumLoops); + return MutableArrayRef(static_cast(this) + ->template getTrailingObjects(), + NumLoops); } ArrayRef getArgsRefs() const { - return static_cast(this) - ->template getTrailingObjects(NumLoops); + return ArrayRef(static_cast(this) + ->template getTrailingObjects(), + NumLoops); } ///@} @@ -3833,7 +3841,7 @@ class OMPReductionClause final return MutableArrayRef(getLHSExprs().end(), varlist_size()); } ArrayRef getRHSExprs() const { - return ArrayRef(getLHSExprs().end(), varlist_size()); + return llvm::ArrayRef(getLHSExprs().end(), varlist_size()); } /// Set list of helper reduction expressions, required for proper @@ -5917,15 +5925,18 @@ class OMPMappableExprListClause : public OMPVarListClause, /// Get the unique declarations that are in the trailing objects of the /// class. MutableArrayRef getUniqueDeclsRef() { - return static_cast(this)->template getTrailingObjects( + return MutableArrayRef( + static_cast(this)->template getTrailingObjects(), NumUniqueDeclarations); } /// Get the unique declarations that are in the trailing objects of the /// class. ArrayRef getUniqueDeclsRef() const { - return static_cast(this) - ->template getTrailingObjects(NumUniqueDeclarations); + return ArrayRef( + static_cast(this) + ->template getTrailingObjects(), + NumUniqueDeclarations); } /// Set the unique declarations that are in the trailing objects of the @@ -5939,14 +5950,16 @@ class OMPMappableExprListClause : public OMPVarListClause, /// Get the number of lists per declaration that are in the trailing /// objects of the class. MutableArrayRef getDeclNumListsRef() { - return static_cast(this)->template getTrailingObjects( + return MutableArrayRef( + static_cast(this)->template getTrailingObjects(), NumUniqueDeclarations); } /// Get the number of lists per declaration that are in the trailing /// objects of the class. ArrayRef getDeclNumListsRef() const { - return static_cast(this)->template getTrailingObjects( + return ArrayRef( + static_cast(this)->template getTrailingObjects(), NumUniqueDeclarations); } @@ -5986,14 +5999,18 @@ class OMPMappableExprListClause : public OMPVarListClause, /// Get the components that are in the trailing objects of the class. MutableArrayRef getComponentsRef() { - return static_cast(this) - ->template getTrailingObjects(NumComponents); + return MutableArrayRef( + static_cast(this) + ->template getTrailingObjects(), + NumComponents); } /// Get the components that are in the trailing objects of the class. ArrayRef getComponentsRef() const { - return static_cast(this) - ->template getTrailingObjects(NumComponents); + return ArrayRef( + static_cast(this) + ->template getTrailingObjects(), + NumComponents); } /// Set the components that are in the trailing objects of the class. diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index 5c8c0e1cf1d00..180f3623983de 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -2521,7 +2521,6 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase { bool isChar16Type() const; bool isChar32Type() const; bool isAnyCharacterType() const; - bool isUnicodeCharacterType() const; bool isIntegralType(const ASTContext &Ctx) const; /// Determine whether this type is an integral or enumeration type. diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 187d3b5ed24a7..11b1e247237a7 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4819,12 +4819,6 @@ def HLSLResourceHandleFromBinding : LangBuiltin<"HLSL_LANG"> { let Prototype = "void(...)"; } -def HLSLResourceHandleFromImplicitBinding : LangBuiltin<"HLSL_LANG"> { - let Spellings = ["__builtin_hlsl_resource_handlefromimplicitbinding"]; - let Attributes = [NoThrow]; - let Prototype = "void(...)"; -} - def HLSLAll : LangBuiltin<"HLSL_LANG"> { let Spellings = ["__builtin_hlsl_all"]; let Attributes = [NoThrow, Const]; diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 4da8f80345ddc..b15cba698030c 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -670,10 +670,6 @@ def note_drv_verify_prefix_spelling : Note< "-verify prefixes must start with a letter and contain only alphanumeric" " characters, hyphens, and underscores">; -def note_command_line_code_loc_requirement - : Note<"-code-completion-at=:: requires and " - " to be integers greater than zero">; - def warn_drv_global_isel_incomplete : Warning< "-fglobal-isel support for the '%0' architecture is incomplete">, InGroup; diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 616f2555931f5..5a3e756f07ecc 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -111,7 +111,6 @@ def EnumConversion : DiagGroup<"enum-conversion", ImplicitEnumEnumCast, EnumFloatConversion, EnumCompareConditional]>; -def CharacterConversion : DiagGroup<"character-conversion">; def DeprecatedOFast : DiagGroup<"deprecated-ofast">; def ObjCSignedCharBoolImplicitIntConversion : DiagGroup<"objc-signed-char-bool-implicit-int-conversion">; @@ -1120,7 +1119,6 @@ def Parentheses : DiagGroup<"parentheses", // - __null-to-integer conversion warnings are on by default def Conversion : DiagGroup<"conversion", [BoolConversion, - CharacterConversion, ConstantConversion, EnumConversion, BitFieldEnumConversion, diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index f0bd5a1174020..6e940a318b61d 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -4369,29 +4369,6 @@ def warn_address_of_reference_bool_conversion : Warning< "code; pointer may be assumed to always convert to true">, InGroup; -def warn_impcast_unicode_char_type - : Warning<"implicit conversion from %0 to %1 may change the meaning of the " - "represented code unit">, - InGroup; -def warn_impcast_unicode_precision - : Warning<"implicit conversion from %0 to %1 may lose precision and change " - "the meaning of the represented code unit">, - InGroup; -def warn_impcast_unicode_char_type_constant - : Warning<"implicit conversion from %0 to %1 changes the meaning of the " - "%select{code unit|code point}2 '%3'">, - InGroup; - -def warn_comparison_unicode_mixed_types - : Warning<"comparing values of different Unicode code unit types %0 and %1 " - "may compare different code points">, - InGroup; - -def warn_comparison_unicode_mixed_types_constant - : Warning<"comparing values of different Unicode code unit types %0 and %1 " - "compares unrelated code units '%2' and '%3'">, - InGroup; - def warn_xor_used_as_pow : Warning< "result of '%0' is %1; did you mean exponentiation?">, InGroup; @@ -6857,7 +6834,7 @@ def err_counted_by_on_incomplete_type_on_use : Error < def note_counted_by_consider_completing_pointee_ty : Note< "consider providing a complete definition for %0">; - + def note_counted_by_consider_using_sized_by : Note< "consider using '__sized_by%select{|_or_null}0' instead of " "'__counted_by%select{|_or_null}0'">; @@ -7756,11 +7733,6 @@ def warn_comparison_of_mixed_enum_types_switch : Warning< "%diff{ ($ and $)|}0,1">, InGroup; -def warn_arith_conv_mixed_unicode_types - : Warning<"%sub{select_arith_conv_kind}0 " - "different Unicode character types %1 and %2">, - InGroup; - def err_typecheck_assign_const : Error< "%select{" "cannot assign to return value because function %1 returns a const value|" diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h index 3d035f0a5f787..62cc8acf9588b 100644 --- a/clang/include/clang/Basic/Module.h +++ b/clang/include/clang/Basic/Module.h @@ -888,7 +888,7 @@ class VisibleModuleSet { /// Get the location at which the import of a module was triggered. SourceLocation getImportLoc(const Module *M) const { - return M && M->getVisibilityID() < ImportLocs.size() + return M->getVisibilityID() < ImportLocs.size() ? ImportLocs[M->getVisibilityID()] : SourceLocation(); } diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td index 7251cc2d1759a..ab0051efe5159 100644 --- a/clang/include/clang/Basic/arm_neon.td +++ b/clang/include/clang/Basic/arm_neon.td @@ -279,10 +279,10 @@ def OP_CVT_F32_BF16 // Splat operation - performs a range-checked splat over a vector def SPLAT : WInst<"splat_lane", ".(!q)I", - "UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPlmQm", + "UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl", [ImmCheck<1, ImmCheckLaneIndex, 0>]>; def SPLATQ : WInst<"splat_laneq", ".(!Q)I", - "UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPlmQm", + "UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl", [ImmCheck<1, ImmCheckLaneIndex, 0>]>; let TargetGuard = "bf16,neon" in { @@ -547,19 +547,19 @@ def VST4_LANE_F16 : WInst<"vst4_lane", "v*(4!)I", "hQh", // E.3.16 Extract lanes from a vector let InstName = "vmov" in def VGET_LANE : IInst<"vget_lane", "1.I", - "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlmQm", + "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl", [ImmCheck<1, ImmCheckLaneIndex, 0>]>; //////////////////////////////////////////////////////////////////////////////// // E.3.17 Set lanes within a vector let InstName = "vmov" in def VSET_LANE : IInst<"vset_lane", ".1.I", - "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlmQm", + "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl", [ImmCheck<2, ImmCheckLaneIndex, 1>]>; //////////////////////////////////////////////////////////////////////////////// // E.3.18 Initialize a vector from bit pattern -def VCREATE : NoTestOpInst<"vcreate", ".(IU>)", "csihfUcUsUiUlPcPslm", OP_CAST> { +def VCREATE : NoTestOpInst<"vcreate", ".(IU>)", "csihfUcUsUiUlPcPsl", OP_CAST> { let BigEndianSafe = 1; } @@ -567,20 +567,20 @@ def VCREATE : NoTestOpInst<"vcreate", ".(IU>)", "csihfUcUsUiUlPcPslm", OP_CAST> // E.3.19 Set all lanes to same value let InstName = "vmov" in { def VDUP_N : WOpInst<"vdup_n", ".1", - "UcUsUicsiPcPshfQUcQUsQUiQcQsQiQPcQPsQhQflUlQlQUlmQm", + "UcUsUicsiPcPshfQUcQUsQUiQcQsQiQPcQPsQhQflUlQlQUl", OP_DUP>; def VMOV_N : WOpInst<"vmov_n", ".1", - "UcUsUicsiPcPshfQUcQUsQUiQcQsQiQPcQPsQhQflUlQlQUlmQm", + "UcUsUicsiPcPshfQUcQUsQUiQcQsQiQPcQPsQhQflUlQlQUl", OP_DUP>; } let InstName = "" in def VDUP_LANE: WOpInst<"vdup_lane", ".qI", - "UcUsUicsiPcPshfQUcQUsQUiQcQsQiQPcQPsQhQflUlQlQUlmQm", + "UcUsUicsiPcPshfQUcQUsQUiQcQsQiQPcQPsQhQflUlQlQUl", OP_DUP_LN>; //////////////////////////////////////////////////////////////////////////////// // E.3.20 Combining vectors -def VCOMBINE : NoTestOpInst<"vcombine", "Q..", "csilhfUcUsUiUlPcPsm", OP_CONC>; +def VCOMBINE : NoTestOpInst<"vcombine", "Q..", "csilhfUcUsUiUlPcPs", OP_CONC>; //////////////////////////////////////////////////////////////////////////////// // E.3.21 Splitting vectors @@ -589,8 +589,8 @@ def VCOMBINE : NoTestOpInst<"vcombine", "Q..", "csilhfUcUsUiUlPcPsm", OP_CONC>; // versions of these intrinsics in both AArch32 and AArch64 architectures. See // D45668 for more details. let InstName = "vmov" in { -def VGET_HIGH : NoTestOpInst<"vget_high", ".Q", "csilhfUcUsUiUlPcPsm", OP_HI>; -def VGET_LOW : NoTestOpInst<"vget_low", ".Q", "csilhfUcUsUiUlPcPsm", OP_LO>; +def VGET_HIGH : NoTestOpInst<"vget_high", ".Q", "csilhfUcUsUiUlPcPs", OP_HI>; +def VGET_LOW : NoTestOpInst<"vget_low", ".Q", "csilhfUcUsUiUlPcPs", OP_LO>; } //////////////////////////////////////////////////////////////////////////////// @@ -619,16 +619,16 @@ def VQMOVUN : SInst<"vqmovun", "(; //////////////////////////////////////////////////////////////////////////////// // E.3.23-24 Table lookup, Extended table lookup let InstName = "vtbl" in { -def VTBL1 : WInst<"vtbl1", "..p", "UccPcm">; -def VTBL2 : WInst<"vtbl2", ".2p", "UccPcm">; -def VTBL3 : WInst<"vtbl3", ".3p", "UccPcm">; -def VTBL4 : WInst<"vtbl4", ".4p", "UccPcm">; +def VTBL1 : WInst<"vtbl1", "..p", "UccPc">; +def VTBL2 : WInst<"vtbl2", ".2p", "UccPc">; +def VTBL3 : WInst<"vtbl3", ".3p", "UccPc">; +def VTBL4 : WInst<"vtbl4", ".4p", "UccPc">; } let InstName = "vtbx" in { -def VTBX1 : WInst<"vtbx1", "...p", "UccPcm">; -def VTBX2 : WInst<"vtbx2", "..2p", "UccPcm">; -def VTBX3 : WInst<"vtbx3", "..3p", "UccPcm">; -def VTBX4 : WInst<"vtbx4", "..4p", "UccPcm">; +def VTBX1 : WInst<"vtbx1", "...p", "UccPc">; +def VTBX2 : WInst<"vtbx2", "..2p", "UccPc">; +def VTBX3 : WInst<"vtbx3", "..3p", "UccPc">; +def VTBX4 : WInst<"vtbx4", "..4p", "UccPc">; } //////////////////////////////////////////////////////////////////////////////// @@ -677,15 +677,15 @@ def VQDMLSL_N : SOpInst<"vqdmlsl_n", "(>Q)(>Q).1", "si", OP_QDMLSL_N>; //////////////////////////////////////////////////////////////////////////////// // E.3.26 Vector Extract def VEXT : WInst<"vext", "...I", - "cUcPcsUsPsiUilUlfQcQUcQPcQsQUsQPsQiQUiQlQUlQfmQm", + "cUcPcsUsPsiUilUlfQcQUcQPcQsQUsQPsQiQUiQlQUlQf", [ImmCheck<2, ImmCheckLaneIndex, 0>]>; //////////////////////////////////////////////////////////////////////////////// // E.3.27 Reverse vector elements -def VREV64 : WOpInst<"vrev64", "..", "csiUcUsUiPcPsfQcQsQiQUcQUsQUiQPcQPsQfmQm", +def VREV64 : WOpInst<"vrev64", "..", "csiUcUsUiPcPsfQcQsQiQUcQUsQUiQPcQPsQf", OP_REV64>; -def VREV32 : WOpInst<"vrev32", "..", "csUcUsPcPsQcQsQUcQUsQPcQPsmQm", OP_REV32>; -def VREV16 : WOpInst<"vrev16", "..", "cUcPcQcQUcQPcmQm", OP_REV16>; +def VREV32 : WOpInst<"vrev32", "..", "csUcUsPcPsQcQsQUcQUsQPcQPs", OP_REV32>; +def VREV16 : WOpInst<"vrev16", "..", "cUcPcQcQUcQPc", OP_REV16>; //////////////////////////////////////////////////////////////////////////////// // E.3.28 Other single operand arithmetic @@ -709,13 +709,13 @@ def VBIC : LOpInst<"vbic", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_ANDN>; def VORN : LOpInst<"vorn", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_ORN>; let isHiddenLInst = 1 in def VBSL : SInst<"vbsl", ".U..", - "csilUcUsUiUlfPcPsQcQsQiQlQUcQUsQUiQUlQfQPcQPsmQm">; + "csilUcUsUiUlfPcPsQcQsQiQlQUcQUsQUiQUlQfQPcQPs">; //////////////////////////////////////////////////////////////////////////////// // E.3.30 Transposition operations -def VTRN : WInst<"vtrn", "2..", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPsmQm">; -def VZIP : WInst<"vzip", "2..", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPsmQm">; -def VUZP : WInst<"vuzp", "2..", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPsmQm">; +def VTRN : WInst<"vtrn", "2..", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">; +def VZIP : WInst<"vzip", "2..", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">; +def VUZP : WInst<"vuzp", "2..", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">; //////////////////////////////////////////////////////////////////////////////// @@ -1028,19 +1028,19 @@ def GET_LANE : IInst<"vget_lane", "1.I", "dQdPlQPl", def SET_LANE : IInst<"vset_lane", ".1.I", "dQdPlQPl", [ImmCheck<2, ImmCheckLaneIndex, 1>]>; def COPY_LANE : IOpInst<"vcopy_lane", "..I.I", - "csilUcUsUiUlPcPsPlfdm", OP_COPY_LN>; + "csilUcUsUiUlPcPsPlfd", OP_COPY_LN>; def COPYQ_LANE : IOpInst<"vcopy_lane", "..IqI", - "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPlQm", OP_COPY_LN>; + "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPl", OP_COPY_LN>; def COPY_LANEQ : IOpInst<"vcopy_laneq", "..IQI", - "csilPcPsPlUcUsUiUlfdm", OP_COPY_LN>; + "csilPcPsPlUcUsUiUlfd", OP_COPY_LN>; def COPYQ_LANEQ : IOpInst<"vcopy_laneq", "..I.I", - "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPlQm", OP_COPY_LN>; + "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPl", OP_COPY_LN>; //////////////////////////////////////////////////////////////////////////////// // Set all lanes to same value def VDUP_LANE1: WOpInst<"vdup_lane", ".qI", "dQdPlQPl", OP_DUP_LN>; def VDUP_LANE2: WOpInst<"vdup_laneq", ".QI", - "csilUcUsUiUlPcPshfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQdPlQPlmQm", + "csilUcUsUiUlPcPshfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQdPlQPl", OP_DUP_LN>; def DUP_N : WOpInst<"vdup_n", ".1", "dQdPlQPl", OP_DUP>; def MOV_N : WOpInst<"vmov_n", ".1", "dQdPlQPl", OP_DUP>; @@ -1266,31 +1266,31 @@ def FMINNM_S64 : SInst<"vminnm", "...", "dQd">; //////////////////////////////////////////////////////////////////////////////// // Permutation def VTRN1 : SOpInst<"vtrn1", "...", - "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPlmQm", OP_TRN1>; + "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_TRN1>; def VZIP1 : SOpInst<"vzip1", "...", - "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPlmQm", OP_ZIP1>; + "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_ZIP1>; def VUZP1 : SOpInst<"vuzp1", "...", - "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPlmQm", OP_UZP1>; + "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_UZP1>; def VTRN2 : SOpInst<"vtrn2", "...", - "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPlmQm", OP_TRN2>; + "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_TRN2>; def VZIP2 : SOpInst<"vzip2", "...", - "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPlmQm", OP_ZIP2>; + "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_ZIP2>; def VUZP2 : SOpInst<"vuzp2", "...", - "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPlmQm", OP_UZP2>; + "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_UZP2>; //////////////////////////////////////////////////////////////////////////////// // Table lookup let InstName = "vtbl" in { -def VQTBL1_A64 : WInst<"vqtbl1", ".QU", "UccPcQUcQcQPcmQm">; -def VQTBL2_A64 : WInst<"vqtbl2", ".(2Q)U", "UccPcQUcQcQPcmQm">; -def VQTBL3_A64 : WInst<"vqtbl3", ".(3Q)U", "UccPcQUcQcQPcmQm">; -def VQTBL4_A64 : WInst<"vqtbl4", ".(4Q)U", "UccPcQUcQcQPcmQm">; +def VQTBL1_A64 : WInst<"vqtbl1", ".QU", "UccPcQUcQcQPc">; +def VQTBL2_A64 : WInst<"vqtbl2", ".(2Q)U", "UccPcQUcQcQPc">; +def VQTBL3_A64 : WInst<"vqtbl3", ".(3Q)U", "UccPcQUcQcQPc">; +def VQTBL4_A64 : WInst<"vqtbl4", ".(4Q)U", "UccPcQUcQcQPc">; } let InstName = "vtbx" in { -def VQTBX1_A64 : WInst<"vqtbx1", "..QU", "UccPcQUcQcQPcmQm">; -def VQTBX2_A64 : WInst<"vqtbx2", "..(2Q)U", "UccPcQUcQcQPcmQm">; -def VQTBX3_A64 : WInst<"vqtbx3", "..(3Q)U", "UccPcQUcQcQPcmQm">; -def VQTBX4_A64 : WInst<"vqtbx4", "..(4Q)U", "UccPcQUcQcQPcmQm">; +def VQTBX1_A64 : WInst<"vqtbx1", "..QU", "UccPcQUcQcQPc">; +def VQTBX2_A64 : WInst<"vqtbx2", "..(2Q)U", "UccPcQUcQcQPc">; +def VQTBX3_A64 : WInst<"vqtbx3", "..(3Q)U", "UccPcQUcQcQPc">; +def VQTBX4_A64 : WInst<"vqtbx4", "..(4Q)U", "UccPcQUcQcQPc">; } //////////////////////////////////////////////////////////////////////////////// @@ -1654,9 +1654,9 @@ def SCALAR_SQRDMLSH_LANE : SOpInst<"vqrdmlsh_lane", "111.I", "SsSi", OP_SCALAR_Q def SCALAR_SQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "111QI", "SsSi", OP_SCALAR_QRDMLSH_LN>; } // TargetGuard = "v8.1a" -def SCALAR_VDUP_LANE : IInst<"vdup_lane", "1.I", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPsSm", +def SCALAR_VDUP_LANE : IInst<"vdup_lane", "1.I", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs", [ImmCheck<1, ImmCheckLaneIndex, 0>]>; -def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "1QI", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPsSm", +def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "1QI", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs", [ImmCheck<1, ImmCheckLaneIndex, 0>]>; } // ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)" @@ -2090,17 +2090,17 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "r // Lookup table read with 2-bit/4-bit indices let ArchGuard = "defined(__aarch64__)", TargetGuard = "lut" in { - def VLUTI2_B : SInst<"vluti2_lane", "Q.(qU)I", "cUcPcmQcQUcQPcQm", + def VLUTI2_B : SInst<"vluti2_lane", "Q.(qU)I", "cUcPcQcQUcQPc", [ImmCheck<2, ImmCheck0_1>]>; - def VLUTI2_B_Q : SInst<"vluti2_laneq", "Q.(QU)I", "cUcPcmQcQUcQPcQm", + def VLUTI2_B_Q : SInst<"vluti2_laneq", "Q.(QU)I", "cUcPcQcQUcQPc", [ImmCheck<2, ImmCheck0_3>]>; def VLUTI2_H : SInst<"vluti2_lane", "Q.(]>; def VLUTI2_H_Q : SInst<"vluti2_laneq", "Q.(]>; - def VLUTI4_B : SInst<"vluti4_lane", "..(qU)I", "QcQUcQPcQm", + def VLUTI4_B : SInst<"vluti4_lane", "..(qU)I", "QcQUcQPc", [ImmCheck<2, ImmCheck0_0>]>; - def VLUTI4_B_Q : SInst<"vluti4_laneq", "..UI", "QcQUcQPcQm", + def VLUTI4_B_Q : SInst<"vluti4_laneq", "..UI", "QcQUcQPc", [ImmCheck<2, ImmCheck0_1>]>; def VLUTI4_H_X2 : SInst<"vluti4_lane_x2", ".2(]>; diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index e08f372450285..9f5fa266742e8 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -269,7 +269,7 @@ def PtrStrideOp : CIR_Op<"ptr_stride", let extraClassDeclaration = [{ // Get type pointed by the base pointer. mlir::Type getElementTy() { - return getBase().getType().getPointee(); + return mlir::cast(getBase().getType()).getPointee(); } }]; } @@ -1710,7 +1710,7 @@ def GetMemberOp : CIR_Op<"get_member"> { /// Return the result type. cir::PointerType getResultTy() { - return getResult().getType(); + return mlir::cast(getResult().getType()); } }]; diff --git a/clang/include/clang/Frontend/CommandLineSourceLoc.h b/clang/include/clang/Frontend/CommandLineSourceLoc.h index b07ffcb65c067..074800a881a89 100644 --- a/clang/include/clang/Frontend/CommandLineSourceLoc.h +++ b/clang/include/clang/Frontend/CommandLineSourceLoc.h @@ -24,9 +24,7 @@ namespace clang { /// A source location that has been parsed on the command line. struct ParsedSourceLocation { std::string FileName; - // The 1-based line number unsigned Line; - // The 1-based column number unsigned Column; public: @@ -40,8 +38,7 @@ struct ParsedSourceLocation { // If both tail splits were valid integers, return success. if (!ColSplit.second.getAsInteger(10, PSL.Column) && - !LineSplit.second.getAsInteger(10, PSL.Line) && - !(PSL.Column == 0 || PSL.Line == 0)) { + !LineSplit.second.getAsInteger(10, PSL.Line)) { PSL.FileName = std::string(LineSplit.first); // On the command-line, stdin may be specified via "-". Inside the @@ -92,12 +89,8 @@ struct ParsedSourceRange { // probably belongs to the filename which menas the whole // string should be parsed. RangeSplit.first = Str; - } else { - // Column and line numbers are 1-based. - if (EndLine == 0 || EndColumn == 0) - return std::nullopt; + } else HasEndLoc = true; - } } auto Begin = ParsedSourceLocation::FromString(RangeSplit.first); if (Begin.FileName.empty()) diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 5ec67087aeea4..6ea7ee281e14d 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -12576,7 +12576,6 @@ class Sema final : public SemaBase { bool PartialOverloading, bool AggregateDeductionCandidate, bool PartialOrdering, QualType ObjectType, Expr::Classification ObjectClassification, - bool ForOverloadSetAddressResolution, llvm::function_ref)> CheckNonDependent); /// Deduce template arguments when taking the address of a function diff --git a/clang/include/clang/Sema/SemaHLSL.h b/clang/include/clang/Sema/SemaHLSL.h index 15182bb27bbdf..e340547ff5f45 100644 --- a/clang/include/clang/Sema/SemaHLSL.h +++ b/clang/include/clang/Sema/SemaHLSL.h @@ -175,8 +175,6 @@ class SemaHLSL : public SemaBase { // buffer which will be created at the end of the translation unit. llvm::SmallVector DefaultCBufferDecls; - uint32_t ImplicitBindingNextOrderID = 0; - private: void collectResourceBindingsOnVarDecl(VarDecl *D); void collectResourceBindingsOnUserRecordDecl(const VarDecl *VD, @@ -184,11 +182,6 @@ class SemaHLSL : public SemaBase { void processExplicitBindingsOnDecl(VarDecl *D); void diagnoseAvailabilityViolations(TranslationUnitDecl *TU); - - bool initGlobalResourceDecl(VarDecl *VD); - uint32_t getNextImplicitBindingOrderID() { - return ImplicitBindingNextOrderID++; - } }; } // namespace clang diff --git a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def index 90b80e5201aa8..fab19c76a22fe 100644 --- a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def +++ b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def @@ -7,9 +7,6 @@ //===----------------------------------------------------------------------===// // // This file defines the analyzer options avaible with -analyzer-config. -// Note that clang/docs/tools/generate_analyzer_options_docs.py relies on the -// structure of this file, so if this file is refactored, then make sure to -// update that script as well. // //===----------------------------------------------------------------------===// diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h index e995151927c96..3754e25501635 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h @@ -307,9 +307,11 @@ class ExplodedGraph { // Type definitions. using NodeVector = std::vector; - /// The root of the simulation graph. Can be nullptr if the graph is empty or - /// if it was populated by `createUncachedNode()`. - ExplodedNode *Root = nullptr; + /// The roots of the simulation graph. Usually there will be only + /// one, but clients are free to establish multiple subgraphs within a single + /// SimulGraph. Moreover, these subgraphs can often merge when paths from + /// different roots reach the same state at the same program location. + NodeVector Roots; /// The nodes in the simulation graph which have been /// specially marked as the endpoint of an abstract simulation path. @@ -343,31 +345,31 @@ class ExplodedGraph { ExplodedGraph(); ~ExplodedGraph(); - /// Get the root node of the graph. This may return nullptr if the graph is - /// empty or under construction. - ExplodedNode *getRoot() const { return Root; } - - /// Retrieve the node associated with a (Location, State) pair, where the - /// 'Location' is a ProgramPoint in the CFG. If no node for this pair exists, - /// it is created. IsNew is set to true if the node was freshly created. + /// Retrieve the node associated with a (Location,State) pair, + /// where the 'Location' is a ProgramPoint in the CFG. If no node for + /// this pair exists, it is created. IsNew is set to true if + /// the node was freshly created. ExplodedNode *getNode(const ProgramPoint &L, ProgramStateRef State, bool IsSink = false, bool* IsNew = nullptr); - /// Create a node for a (Location, State) pair, but don't store it for - /// deduplication later. This is useful when copying some nodes from an - /// already completed ExplodedGraph for further processing. + /// Create a node for a (Location, State) pair, + /// but don't store it for deduplication later. This + /// is useful when copying an already completed + /// ExplodedGraph for further processing. ExplodedNode *createUncachedNode(const ProgramPoint &L, ProgramStateRef State, int64_t Id, bool IsSink = false); - /// Mark a node as the root of the graph. Calling this is an error if the - /// graph already has a root node. - void designateAsRoot(ExplodedNode *V) { - assert(V && "Cannot designate nullptr as root!"); - assert(!Root && "The graph already has a root, cannot designate another!"); - Root = V; + std::unique_ptr MakeEmptyGraph() const { + return std::make_unique(); + } + + /// addRoot - Add an untyped node to the set of roots. + ExplodedNode *addRoot(ExplodedNode *V) { + Roots.push_back(V); + return V; } /// addEndOfPath - Add an untyped node to the set of EOP nodes. @@ -376,6 +378,7 @@ class ExplodedGraph { return V; } + unsigned num_roots() const { return Roots.size(); } unsigned num_eops() const { return EndNodes.size(); } bool empty() const { return NumNodes == 0; } @@ -386,6 +389,8 @@ class ExplodedGraph { // Iterators. using NodeTy = ExplodedNode; using AllNodesTy = llvm::FoldingSet; + using roots_iterator = NodeVector::iterator; + using const_roots_iterator = NodeVector::const_iterator; using eop_iterator = NodeVector::iterator; using const_eop_iterator = NodeVector::const_iterator; using node_iterator = AllNodesTy::iterator; @@ -395,6 +400,14 @@ class ExplodedGraph { llvm::iterator_range nodes() const { return Nodes; } + roots_iterator roots_begin() { return Roots.begin(); } + + roots_iterator roots_end() { return Roots.end(); } + + const_roots_iterator roots_begin() const { return Roots.begin(); } + + const_roots_iterator roots_end() const { return Roots.end(); } + eop_iterator eop_begin() { return EndNodes.begin(); } eop_iterator eop_end() { return EndNodes.end(); } @@ -495,7 +508,9 @@ namespace llvm { using ChildIteratorType = clang::ento::ExplodedNode::succ_iterator; using nodes_iterator = llvm::df_iterator; - static NodeRef getEntryNode(const GraphTy G) { return G->getRoot(); } + static NodeRef getEntryNode(const GraphTy G) { + return *G->roots_begin(); + } static bool predecessorOfTrivial(NodeRef N) { return N->succ_size() == 1 && N->getFirstSucc()->isTrivial(); diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h index b8a4dcbc727a6..285194148d3d3 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h @@ -222,8 +222,8 @@ class ExprEngine { const Stmt *getStmt() const; const LocationContext *getRootLocationContext() const { - assert(G.getRoot()); - return G.getRoot()->getLocation().getLocationContext(); + assert(G.roots_begin() != G.roots_end()); + return (*G.roots_begin())->getLocation().getLocationContext(); } ConstCFGElementRef getCFGElementRef() const { diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SMTConstraintManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SMTConstraintManager.h index 3105dfa4dae55..d4052ef90de6e 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SMTConstraintManager.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SMTConstraintManager.h @@ -301,10 +301,8 @@ class SMTConstraintManager : public clang::ento::SimpleConstraintManager { llvm_unreachable("Unsupported expression to reason about!"); } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Dumps SMT formula LLVM_DUMP_METHOD void dump() const { Solver->dump(); } -#endif protected: // Check whether a new model is satisfiable, and update the program state. diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h index 86774ad5043dd..9e7c98fdded17 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h @@ -100,7 +100,41 @@ class SymbolConjured : public SymbolData { ConstCFGElementRef getCFGElementRef() const { return Elem; } // It might return null. - const Stmt *getStmt() const; + const Stmt *getStmt() const { + switch (Elem->getKind()) { + case CFGElement::Initializer: + return Elem->castAs().getInitializer()->getInit(); + case CFGElement::ScopeBegin: + return Elem->castAs().getTriggerStmt(); + case CFGElement::ScopeEnd: + return Elem->castAs().getTriggerStmt(); + case CFGElement::NewAllocator: + return Elem->castAs().getAllocatorExpr(); + case CFGElement::LifetimeEnds: + return Elem->castAs().getTriggerStmt(); + case CFGElement::LoopExit: + return Elem->castAs().getLoopStmt(); + case CFGElement::Statement: + return Elem->castAs().getStmt(); + case CFGElement::Constructor: + return Elem->castAs().getStmt(); + case CFGElement::CXXRecordTypedCall: + return Elem->castAs().getStmt(); + case CFGElement::AutomaticObjectDtor: + return Elem->castAs().getTriggerStmt(); + case CFGElement::DeleteDtor: + return Elem->castAs().getDeleteExpr(); + case CFGElement::BaseDtor: + return nullptr; + case CFGElement::MemberDtor: + return nullptr; + case CFGElement::TemporaryDtor: + return Elem->castAs().getBindTemporaryExpr(); + case CFGElement::CleanupFunction: + return nullptr; + } + return nullptr; + } unsigned getCount() const { return Count; } /// It might return null. diff --git a/clang/lib/AST/ASTConcept.cpp b/clang/lib/AST/ASTConcept.cpp index c9adccdbc77ef..f7ee0fb3ee92d 100644 --- a/clang/lib/AST/ASTConcept.cpp +++ b/clang/lib/AST/ASTConcept.cpp @@ -40,8 +40,9 @@ ASTConstraintSatisfaction::ASTConstraintSatisfaction( IsSatisfied{Satisfaction.IsSatisfied}, ContainsErrors{ Satisfaction.ContainsErrors} { for (unsigned I = 0; I < NumRecords; ++I) - CreateUnsatisfiedConstraintRecord(C, Satisfaction.Details[I], - getTrailingObjects() + I); + CreateUnsatisfiedConstraintRecord( + C, Satisfaction.Details[I], + getTrailingObjects() + I); } ASTConstraintSatisfaction::ASTConstraintSatisfaction( @@ -50,8 +51,9 @@ ASTConstraintSatisfaction::ASTConstraintSatisfaction( IsSatisfied{Satisfaction.IsSatisfied}, ContainsErrors{Satisfaction.ContainsErrors} { for (unsigned I = 0; I < NumRecords; ++I) - CreateUnsatisfiedConstraintRecord(C, *(Satisfaction.begin() + I), - getTrailingObjects() + I); + CreateUnsatisfiedConstraintRecord( + C, *(Satisfaction.begin() + I), + getTrailingObjects() + I); } ASTConstraintSatisfaction * diff --git a/clang/lib/AST/ASTDiagnostic.cpp b/clang/lib/AST/ASTDiagnostic.cpp index a00d5801f054b..6cb09b0492ac9 100644 --- a/clang/lib/AST/ASTDiagnostic.cpp +++ b/clang/lib/AST/ASTDiagnostic.cpp @@ -20,8 +20,6 @@ #include "clang/AST/TemplateBase.h" #include "clang/AST/Type.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Support/ConvertUTF.h" -#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" using namespace clang; @@ -2192,31 +2190,3 @@ static bool FormatTemplateTypeDiff(ASTContext &Context, QualType FromType, TD.DiffTemplate(); return TD.Emit(); } - -std::string clang::FormatUTFCodeUnitAsCodepoint(unsigned Value, QualType T) { - auto IsSingleCodeUnitCP = [](unsigned Value, QualType T) { - if (T->isChar8Type()) { - assert(Value <= 0xFF && "not a valid UTF-8 code unit"); - return Value <= 0x7F; - } - if (T->isChar16Type()) { - assert(Value <= 0xFFFF && "not a valid UTF-16 code unit"); - return llvm::IsSingleCodeUnitUTF16Codepoint(Value); - } - assert(T->isChar32Type()); - return llvm::IsSingleCodeUnitUTF32Codepoint(Value); - }; - llvm::SmallVector Str; - if (!IsSingleCodeUnitCP(Value, T)) { - llvm::raw_svector_ostream OS(Str); - OS << "<" << llvm::format_hex(Value, 1, /*Upper=*/true) << ">"; - return std::string(Str.begin(), Str.end()); - } - - char Buffer[UNI_MAX_UTF8_BYTES_PER_CODE_POINT]; - char *Ptr = Buffer; - [[maybe_unused]] bool Converted = llvm::ConvertCodePointToUTF8(Value, Ptr); - assert(Converted && "trying to encode invalid code unit"); - EscapeStringForDiagnostic(StringRef(Buffer, Ptr - Buffer), Str); - return std::string(Str.begin(), Str.end()); -} diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 2580fb17ce5e3..67b6ae4a393e9 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -474,6 +474,10 @@ bool Compiler::VisitCastExpr(const CastExpr *CE) { return false; return this->emitDecayPtr(*FromT, *ToT, CE); } + + case CK_LValueToRValueBitCast: + return this->emitBuiltinBitCast(CE); + case CK_IntegralToBoolean: case CK_FixedPointToBoolean: { // HLSL uses this to cast to one-element vectors. @@ -731,11 +735,6 @@ bool Compiler::VisitCastExpr(const CastExpr *CE) { llvm_unreachable("Unhandled clang::CastKind enum"); } -template -bool Compiler::VisitBuiltinBitCastExpr(const BuiltinBitCastExpr *E) { - return this->emitBuiltinBitCast(E); -} - template bool Compiler::VisitIntegerLiteral(const IntegerLiteral *LE) { if (DiscardResult) diff --git a/clang/lib/AST/ByteCode/Compiler.h b/clang/lib/AST/ByteCode/Compiler.h index 56a972f452af9..ec5bd637453c5 100644 --- a/clang/lib/AST/ByteCode/Compiler.h +++ b/clang/lib/AST/ByteCode/Compiler.h @@ -126,7 +126,6 @@ class Compiler : public ConstStmtVisitor, bool>, // Expressions. bool VisitCastExpr(const CastExpr *E); - bool VisitBuiltinBitCastExpr(const BuiltinBitCastExpr *E); bool VisitIntegerLiteral(const IntegerLiteral *E); bool VisitFloatingLiteral(const FloatingLiteral *E); bool VisitImaginaryLiteral(const ImaginaryLiteral *E); diff --git a/clang/lib/AST/ByteCode/Context.cpp b/clang/lib/AST/ByteCode/Context.cpp index c70a5259b77e2..dae94fc9829c7 100644 --- a/clang/lib/AST/ByteCode/Context.cpp +++ b/clang/lib/AST/ByteCode/Context.cpp @@ -37,7 +37,6 @@ bool Context::isPotentialConstantExpr(State &Parent, const FunctionDecl *FD) { Compiler(*this, *P).compileFunc( FD, const_cast(Func)); - ++EvalID; // And run it. if (!Run(Parent, Func)) return false; diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp index 74efc3c914504..bc860185fea21 100644 --- a/clang/lib/AST/ByteCode/Interp.cpp +++ b/clang/lib/AST/ByteCode/Interp.cpp @@ -1373,10 +1373,6 @@ static bool checkConstructor(InterpState &S, CodePtr OpPC, const Function *Func, bool CheckDestructor(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { if (!CheckLive(S, OpPC, Ptr, AK_Destroy)) return false; - if (!CheckTemporary(S, OpPC, Ptr, AK_Destroy)) - return false; - if (!CheckRange(S, OpPC, Ptr, AK_Destroy)) - return false; // Can't call a dtor on a global variable. if (Ptr.block()->isStatic()) { diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 8425e40567b27..061fedb403ddd 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -4325,7 +4325,8 @@ DependentFunctionTemplateSpecializationInfo:: const ASTTemplateArgumentListInfo *TemplateArgsWritten) : NumCandidates(Candidates.size()), TemplateArgumentsAsWritten(TemplateArgsWritten) { - std::transform(Candidates.begin(), Candidates.end(), getTrailingObjects(), + std::transform(Candidates.begin(), Candidates.end(), + getTrailingObjects(), [](NamedDecl *ND) { return cast(ND->getUnderlyingDecl()); }); @@ -5379,7 +5380,7 @@ PragmaCommentDecl *PragmaCommentDecl::Create(const ASTContext &C, PragmaCommentDecl *PCD = new (C, DC, additionalSizeToAlloc(Arg.size() + 1)) PragmaCommentDecl(DC, CommentLoc, CommentKind); - memcpy(PCD->getTrailingObjects(), Arg.data(), Arg.size()); + memcpy(PCD->getTrailingObjects(), Arg.data(), Arg.size()); PCD->getTrailingObjects()[Arg.size()] = '\0'; return PCD; } @@ -5401,10 +5402,11 @@ PragmaDetectMismatchDecl::Create(const ASTContext &C, TranslationUnitDecl *DC, PragmaDetectMismatchDecl *PDMD = new (C, DC, additionalSizeToAlloc(ValueStart + Value.size() + 1)) PragmaDetectMismatchDecl(DC, Loc, ValueStart); - memcpy(PDMD->getTrailingObjects(), Name.data(), Name.size()); - PDMD->getTrailingObjects()[Name.size()] = '\0'; - memcpy(PDMD->getTrailingObjects() + ValueStart, Value.data(), Value.size()); - PDMD->getTrailingObjects()[ValueStart + Value.size()] = '\0'; + memcpy(PDMD->getTrailingObjects(), Name.data(), Name.size()); + PDMD->getTrailingObjects()[Name.size()] = '\0'; + memcpy(PDMD->getTrailingObjects() + ValueStart, Value.data(), + Value.size()); + PDMD->getTrailingObjects()[ValueStart + Value.size()] = '\0'; return PDMD; } @@ -5898,7 +5900,7 @@ ImportDecl::ImportDecl(DeclContext *DC, SourceLocation StartLoc, : Decl(Import, DC, StartLoc), ImportedModule(Imported), NextLocalImportAndComplete(nullptr, true) { assert(getNumModuleIdentifiers(Imported) == IdentifierLocs.size()); - auto *StoredLocs = getTrailingObjects(); + auto *StoredLocs = getTrailingObjects(); llvm::uninitialized_copy(IdentifierLocs, StoredLocs); } @@ -5906,7 +5908,7 @@ ImportDecl::ImportDecl(DeclContext *DC, SourceLocation StartLoc, Module *Imported, SourceLocation EndLoc) : Decl(Import, DC, StartLoc), ImportedModule(Imported), NextLocalImportAndComplete(nullptr, false) { - *getTrailingObjects() = EndLoc; + *getTrailingObjects() = EndLoc; } ImportDecl *ImportDecl::Create(ASTContext &C, DeclContext *DC, @@ -5937,12 +5939,14 @@ ArrayRef ImportDecl::getIdentifierLocs() const { if (!isImportComplete()) return {}; - return getTrailingObjects(getNumModuleIdentifiers(getImportedModule())); + const auto *StoredLocs = getTrailingObjects(); + return llvm::ArrayRef(StoredLocs, + getNumModuleIdentifiers(getImportedModule())); } SourceRange ImportDecl::getSourceRange() const { if (!isImportComplete()) - return SourceRange(getLocation(), *getTrailingObjects()); + return SourceRange(getLocation(), *getTrailingObjects()); return SourceRange(getLocation(), getIdentifierLocs().back()); } diff --git a/clang/lib/AST/DeclTemplate.cpp b/clang/lib/AST/DeclTemplate.cpp index b951e68b0a1b8..6857eef87de38 100644 --- a/clang/lib/AST/DeclTemplate.cpp +++ b/clang/lib/AST/DeclTemplate.cpp @@ -755,7 +755,7 @@ void TemplateTypeParmDecl::setTypeConstraint( "call setTypeConstraint"); assert(!TypeConstraintInitialized && "TypeConstraint was already initialized!"); - new (getTrailingObjects()) + new (getTrailingObjects()) TypeConstraint(Loc, ImmediatelyDeclaredConstraint, ArgPackSubstIndex); TypeConstraintInitialized = true; } @@ -880,7 +880,8 @@ TemplateTemplateParmDecl::TemplateTemplateParmDecl( : TemplateDecl(TemplateTemplateParm, DC, L, Id, Params), TemplateParmPosition(D, P), Typename(Typename), ParameterPack(true), ExpandedParameterPack(true), NumExpandedParams(Expansions.size()) { - llvm::uninitialized_copy(Expansions, getTrailingObjects()); + llvm::uninitialized_copy(Expansions, + getTrailingObjects()); } TemplateTemplateParmDecl * @@ -938,7 +939,7 @@ void TemplateTemplateParmDecl::setDefaultArgument( //===----------------------------------------------------------------------===// TemplateArgumentList::TemplateArgumentList(ArrayRef Args) : NumArguments(Args.size()) { - llvm::uninitialized_copy(Args, getTrailingObjects()); + llvm::uninitialized_copy(Args, getTrailingObjects()); } TemplateArgumentList * @@ -1165,7 +1166,7 @@ ImplicitConceptSpecializationDecl::CreateDeserialized( void ImplicitConceptSpecializationDecl::setTemplateArguments( ArrayRef Converted) { assert(Converted.size() == NumTemplateArgs); - llvm::uninitialized_copy(Converted, getTrailingObjects()); + llvm::uninitialized_copy(Converted, getTrailingObjects()); } //===----------------------------------------------------------------------===// diff --git a/clang/lib/AST/OpenACCClause.cpp b/clang/lib/AST/OpenACCClause.cpp index 7283ff837b04e..0c141fc908820 100644 --- a/clang/lib/AST/OpenACCClause.cpp +++ b/clang/lib/AST/OpenACCClause.cpp @@ -114,7 +114,7 @@ OpenACCSelfClause::OpenACCSelfClause(SourceLocation BeginLoc, : OpenACCClauseWithParams(OpenACCClauseKind::Self, BeginLoc, LParenLoc, EndLoc), HasConditionExpr(std::nullopt), NumExprs(VarList.size()) { - llvm::uninitialized_copy(VarList, getTrailingObjects()); + llvm::uninitialized_copy(VarList, getTrailingObjects()); } OpenACCSelfClause::OpenACCSelfClause(SourceLocation BeginLoc, @@ -126,7 +126,8 @@ OpenACCSelfClause::OpenACCSelfClause(SourceLocation BeginLoc, assert((!ConditionExpr || ConditionExpr->isInstantiationDependent() || ConditionExpr->getType()->isScalarType()) && "Condition expression type not scalar/dependent"); - llvm::uninitialized_copy(ArrayRef(ConditionExpr), getTrailingObjects()); + llvm::uninitialized_copy(ArrayRef(ConditionExpr), + getTrailingObjects()); } OpenACCClause::child_range OpenACCClause::children() { diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index a20bc3ffba823..82a8cc99cd265 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -2193,20 +2193,6 @@ bool Type::isAnyCharacterType() const { } } -bool Type::isUnicodeCharacterType() const { - const auto *BT = dyn_cast(CanonicalType); - if (!BT) - return false; - switch (BT->getKind()) { - default: - return false; - case BuiltinType::Char8: - case BuiltinType::Char16: - case BuiltinType::Char32: - return true; - } -} - /// isSignedIntegerType - Return true if this is an integer type that is /// signed, according to C99 6.2.5p4 [char, signed char, short, int, long..], /// an enum decl which has a signed representation @@ -2847,11 +2833,6 @@ static bool isTriviallyCopyableTypeImpl(const QualType &type, if (CanonicalType->isScalarType() || CanonicalType->isVectorType()) return true; - // Mfloat8 type is a special case as it not scalar, but is still trivially - // copyable. - if (CanonicalType->isMFloat8Type()) - return true; - if (const auto *RT = CanonicalType->getAs()) { if (const auto *ClassDecl = dyn_cast(RT->getDecl())) { if (IsCopyConstructible) { diff --git a/clang/lib/Basic/Diagnostic.cpp b/clang/lib/Basic/Diagnostic.cpp index 538c1d18a8ac1..fcf86cc4c234c 100644 --- a/clang/lib/Basic/Diagnostic.cpp +++ b/clang/lib/Basic/Diagnostic.cpp @@ -67,12 +67,12 @@ const StreamingDiagnostic &clang::operator<<(const StreamingDiagnostic &DB, return DB; } -static void DummyArgToStringFn(DiagnosticsEngine::ArgumentKind AK, intptr_t QT, - StringRef Modifier, StringRef Argument, - ArrayRef PrevArgs, - SmallVectorImpl &Output, - void *Cookie, - ArrayRef QualTypeVals) { +static void +DummyArgToStringFn(DiagnosticsEngine::ArgumentKind AK, intptr_t QT, + StringRef Modifier, StringRef Argument, + ArrayRef PrevArgs, + SmallVectorImpl &Output, void *Cookie, + ArrayRef QualTypeVals) { StringRef Str = ""; Output.append(Str.begin(), Str.end()); } @@ -94,9 +94,7 @@ DiagnosticsEngine::~DiagnosticsEngine() { setClient(nullptr); } -void DiagnosticsEngine::dump() const { - DiagStatesByLoc.dump(*SourceMgr); -} +void DiagnosticsEngine::dump() const { DiagStatesByLoc.dump(*SourceMgr); } void DiagnosticsEngine::dump(StringRef DiagName) const { DiagStatesByLoc.dump(*SourceMgr, DiagName); @@ -259,7 +257,8 @@ void DiagnosticsEngine::DiagStateMap::dump(SourceManager &SrcMgr, bool PrintedOuterHeading = false; auto PrintOuterHeading = [&] { - if (PrintedOuterHeading) return; + if (PrintedOuterHeading) + return; PrintedOuterHeading = true; llvm::errs() << "File " << &File << " "; SrcMgr.getLocForStartOfFile(Decomp.first) - .getLocWithOffset(Decomp.second) - .print(llvm::errs(), SrcMgr); + .getLocWithOffset(Decomp.second) + .print(llvm::errs(), SrcMgr); } if (File.HasLocalTransitions) llvm::errs() << " has_local_transitions"; @@ -286,14 +285,15 @@ void DiagnosticsEngine::DiagStateMap::dump(SourceManager &SrcMgr, for (DiagStatePoint &Transition : File.StateTransitions) { bool PrintedInnerHeading = false; auto PrintInnerHeading = [&] { - if (PrintedInnerHeading) return; + if (PrintedInnerHeading) + return; PrintedInnerHeading = true; PrintOuterHeading(); llvm::errs() << " "; SrcMgr.getLocForStartOfFile(ID) - .getLocWithOffset(Transition.Offset) - .print(llvm::errs(), SrcMgr); + .getLocWithOffset(Transition.Offset) + .print(llvm::errs(), SrcMgr); llvm::errs() << ": state " << Transition.State << ":\n"; }; @@ -316,11 +316,21 @@ void DiagnosticsEngine::DiagStateMap::dump(SourceManager &SrcMgr, llvm::errs() << ": "; switch (Mapping.second.getSeverity()) { - case diag::Severity::Ignored: llvm::errs() << "ignored"; break; - case diag::Severity::Remark: llvm::errs() << "remark"; break; - case diag::Severity::Warning: llvm::errs() << "warning"; break; - case diag::Severity::Error: llvm::errs() << "error"; break; - case diag::Severity::Fatal: llvm::errs() << "fatal"; break; + case diag::Severity::Ignored: + llvm::errs() << "ignored"; + break; + case diag::Severity::Remark: + llvm::errs() << "remark"; + break; + case diag::Severity::Warning: + llvm::errs() << "warning"; + break; + case diag::Severity::Error: + llvm::errs() << "error"; + break; + case diag::Severity::Fatal: + llvm::errs() << "fatal"; + break; } if (!Mapping.second.isUser()) @@ -533,16 +543,20 @@ void WarningsSpecialCaseList::processSections(DiagnosticsEngine &Diags) { // Drop the default section introduced by special case list, we only support // exact diagnostic group names. // FIXME: We should make this configurable in the parser instead. - Sections.erase("*"); + // FIXME: C++20 can use std::erase_if(Sections, [](Section &sec) { return + // sec.SectionStr == "*"; }); + Sections.erase( + std::remove_if(Sections.begin(), Sections.end(), + [](Section &sec) { return sec.SectionStr == "*"; }), + Sections.end()); // Make sure we iterate sections by their line numbers. - std::vector *>> - LineAndSectionEntry; + std::vector> LineAndSectionEntry; LineAndSectionEntry.reserve(Sections.size()); for (const auto &Entry : Sections) { - StringRef DiagName = Entry.getKey(); + StringRef DiagName = Entry.SectionStr; // Each section has a matcher with that section's name, attached to that // line. - const auto &DiagSectionMatcher = Entry.getValue().SectionMatcher; + const auto &DiagSectionMatcher = Entry.SectionMatcher; unsigned DiagLine = DiagSectionMatcher->Globs.at(DiagName).second; LineAndSectionEntry.emplace_back(DiagLine, &Entry); } @@ -550,7 +564,7 @@ void WarningsSpecialCaseList::processSections(DiagnosticsEngine &Diags) { static constexpr auto WarningFlavor = clang::diag::Flavor::WarningOrError; for (const auto &[_, SectionEntry] : LineAndSectionEntry) { SmallVector GroupDiags; - StringRef DiagGroup = SectionEntry->getKey(); + StringRef DiagGroup = SectionEntry->SectionStr; if (Diags.getDiagnosticIDs()->getDiagnosticsInGroup( WarningFlavor, DiagGroup, GroupDiags)) { StringRef Suggestion = @@ -563,7 +577,7 @@ void WarningsSpecialCaseList::processSections(DiagnosticsEngine &Diags) { for (diag::kind Diag : GroupDiags) // We're intentionally overwriting any previous mappings here to make sure // latest one takes precedence. - DiagToSection[Diag] = &SectionEntry->getValue(); + DiagToSection[Diag] = SectionEntry; } } @@ -662,8 +676,8 @@ bool DiagnosticsEngine::EmitDiagnostic(const DiagnosticBuilder &DB, Diagnostic Info(this, DB); // Figure out the diagnostic level of this message. - DiagnosticIDs::Level DiagLevel - = Diags->getDiagnosticLevel(Info.getID(), Info.getLocation(), *this); + DiagnosticIDs::Level DiagLevel = + Diags->getDiagnosticLevel(Info.getID(), Info.getLocation(), *this); Emitted = (DiagLevel != DiagnosticIDs::Ignored); if (Emitted) { @@ -715,7 +729,7 @@ Diagnostic::Diagnostic(const DiagnosticsEngine *DO, SourceLocation DiagLoc, DiagnosticConsumer::~DiagnosticConsumer() = default; void DiagnosticConsumer::HandleDiagnostic(DiagnosticsEngine::Level DiagLevel, - const Diagnostic &Info) { + const Diagnostic &Info) { if (!IncludeInDiagnosticCounts()) return; @@ -729,7 +743,7 @@ void DiagnosticConsumer::HandleDiagnostic(DiagnosticsEngine::Level DiagLevel, template static bool ModifierIs(const char *Modifier, unsigned ModifierLen, const char (&Str)[StrLen]) { - return StrLen-1 == ModifierLen && memcmp(Modifier, Str, StrLen-1) == 0; + return StrLen - 1 == ModifierLen && memcmp(Modifier, Str, StrLen - 1) == 0; } /// ScanForward - Scans forward, looking for the given character, skipping @@ -737,20 +751,25 @@ static bool ModifierIs(const char *Modifier, unsigned ModifierLen, static const char *ScanFormat(const char *I, const char *E, char Target) { unsigned Depth = 0; - for ( ; I != E; ++I) { - if (Depth == 0 && *I == Target) return I; - if (Depth != 0 && *I == '}') Depth--; + for (; I != E; ++I) { + if (Depth == 0 && *I == Target) + return I; + if (Depth != 0 && *I == '}') + Depth--; if (*I == '%') { I++; - if (I == E) break; + if (I == E) + break; // Escaped characters get implicitly skipped here. // Format specifier. if (!isDigit(*I) && !isPunctuation(*I)) { - for (I++; I != E && !isDigit(*I) && *I != '{'; I++) ; - if (I == E) break; + for (I++; I != E && !isDigit(*I) && *I != '{'; I++) + ; + if (I == E) + break; if (*I == '{') Depth++; } @@ -767,14 +786,15 @@ static const char *ScanFormat(const char *I, const char *E, char Target) { static void HandleSelectModifier(const Diagnostic &DInfo, unsigned ValNo, const char *Argument, unsigned ArgumentLen, SmallVectorImpl &OutStr) { - const char *ArgumentEnd = Argument+ArgumentLen; + const char *ArgumentEnd = Argument + ArgumentLen; // Skip over 'ValNo' |'s. while (ValNo) { const char *NextVal = ScanFormat(Argument, ArgumentEnd, '|'); - assert(NextVal != ArgumentEnd && "Value for integer select modifier was" + assert(NextVal != ArgumentEnd && + "Value for integer select modifier was" " larger than the number of options in the diagnostic string!"); - Argument = NextVal+1; // Skip this string. + Argument = NextVal + 1; // Skip this string. --ValNo; } @@ -973,15 +993,13 @@ static const char *getTokenDescForDiagnostic(tok::TokenKind Kind) { /// FormatDiagnostic - Format this diagnostic into a string, substituting the /// formal arguments into the %0 slots. The result is appended onto the Str /// array. -void Diagnostic:: -FormatDiagnostic(SmallVectorImpl &OutStr) const { +void Diagnostic::FormatDiagnostic(SmallVectorImpl &OutStr) const { if (StoredDiagMessage.has_value()) { OutStr.append(StoredDiagMessage->begin(), StoredDiagMessage->end()); return; } - StringRef Diag = - getDiags()->getDiagnosticIDs()->getDescription(getID()); + StringRef Diag = getDiags()->getDiagnosticIDs()->getDescription(getID()); FormatDiagnostic(Diag.begin(), Diag.end(), OutStr); } @@ -1031,9 +1049,8 @@ void clang::EscapeStringForDiagnostic(StringRef Str, } } -void Diagnostic:: -FormatDiagnostic(const char *DiagStr, const char *DiagEnd, - SmallVectorImpl &OutStr) const { +void Diagnostic::FormatDiagnostic(const char *DiagStr, const char *DiagEnd, + SmallVectorImpl &OutStr) const { // When the diagnostic string is only "%0", the entire string is being given // by an outside source. Remove unprintable characters from this string // and skip all the other string processing. @@ -1067,7 +1084,7 @@ FormatDiagnostic(const char *DiagStr, const char *DiagEnd, DiagStr = StrEnd; continue; } else if (isPunctuation(DiagStr[1])) { - OutStr.push_back(DiagStr[1]); // %% -> %. + OutStr.push_back(DiagStr[1]); // %% -> %. DiagStr += 2; continue; } @@ -1086,10 +1103,9 @@ FormatDiagnostic(const char *DiagStr, const char *DiagEnd, // Check to see if we have a modifier. If so eat it. if (!isDigit(DiagStr[0])) { Modifier = DiagStr; - while (DiagStr[0] == '-' || - (DiagStr[0] >= 'a' && DiagStr[0] <= 'z')) + while (DiagStr[0] == '-' || (DiagStr[0] >= 'a' && DiagStr[0] <= 'z')) ++DiagStr; - ModifierLen = DiagStr-Modifier; + ModifierLen = DiagStr - Modifier; // If we have an argument, get it next. if (DiagStr[0] == '{') { @@ -1098,8 +1114,8 @@ FormatDiagnostic(const char *DiagStr, const char *DiagEnd, DiagStr = ScanFormat(DiagStr, DiagEnd, '}'); assert(DiagStr != DiagEnd && "Mismatched {}'s in diagnostic string!"); - ArgumentLen = DiagStr-Argument; - ++DiagStr; // Skip }. + ArgumentLen = DiagStr - Argument; + ++DiagStr; // Skip }. } } @@ -1113,7 +1129,7 @@ FormatDiagnostic(const char *DiagStr, const char *DiagEnd, if (ModifierIs(Modifier, ModifierLen, "diff")) { assert(*DiagStr == ',' && isDigit(*(DiagStr + 1)) && "Invalid format for diff modifier"); - ++DiagStr; // Comma. + ++DiagStr; // Comma. ArgNo2 = *DiagStr++ - '0'; DiagnosticsEngine::ArgumentKind Kind2 = getArgKind(ArgNo2); if (Kind == DiagnosticsEngine::ak_qualtype && @@ -1131,8 +1147,8 @@ FormatDiagnostic(const char *DiagStr, const char *DiagEnd, "Found too many '|'s in a %diff modifier!"); const char *FirstDollar = ScanFormat(Argument, Pipe, '$'); const char *SecondDollar = ScanFormat(FirstDollar + 1, Pipe, '$'); - const char ArgStr1[] = { '%', static_cast('0' + ArgNo) }; - const char ArgStr2[] = { '%', static_cast('0' + ArgNo2) }; + const char ArgStr1[] = {'%', static_cast('0' + ArgNo)}; + const char ArgStr2[] = {'%', static_cast('0' + ArgNo2)}; FormatDiagnostic(Argument, FirstDollar, OutStr); FormatDiagnostic(ArgStr1, ArgStr1 + 2, OutStr); FormatDiagnostic(FirstDollar + 1, SecondDollar, OutStr); @@ -1256,8 +1272,7 @@ FormatDiagnostic(const char *DiagStr, const char *DiagEnd, getDiags()->ConvertArgToString(Kind, getRawArg(ArgNo), StringRef(Modifier, ModifierLen), StringRef(Argument, ArgumentLen), - FormattedArgs, - OutStr, QualTypeVals); + FormattedArgs, OutStr, QualTypeVals); break; case DiagnosticsEngine::ak_qualtype_pair: { // Create a struct with all the info needed for printing. @@ -1280,8 +1295,7 @@ FormatDiagnostic(const char *DiagStr, const char *DiagEnd, getDiags()->ConvertArgToString(Kind, val, StringRef(Modifier, ModifierLen), StringRef(Argument, ArgumentLen), - FormattedArgs, - Tree, QualTypeVals); + FormattedArgs, Tree, QualTypeVals); // If there is no tree information, fall back to regular printing. if (!Tree.empty()) { FormatDiagnostic(Pipe + 1, ArgumentEnd, OutStr); @@ -1303,11 +1317,10 @@ FormatDiagnostic(const char *DiagStr, const char *DiagEnd, getDiags()->ConvertArgToString(Kind, val, StringRef(Modifier, ModifierLen), StringRef(Argument, ArgumentLen), - FormattedArgs, - OutStr, QualTypeVals); + FormattedArgs, OutStr, QualTypeVals); if (!TDT.TemplateDiffUsed) - FormattedArgs.push_back(std::make_pair(DiagnosticsEngine::ak_qualtype, - TDT.FromType)); + FormattedArgs.push_back( + std::make_pair(DiagnosticsEngine::ak_qualtype, TDT.FromType)); // Append middle text FormatDiagnostic(FirstDollar + 1, SecondDollar, OutStr); @@ -1317,11 +1330,10 @@ FormatDiagnostic(const char *DiagStr, const char *DiagEnd, getDiags()->ConvertArgToString(Kind, val, StringRef(Modifier, ModifierLen), StringRef(Argument, ArgumentLen), - FormattedArgs, - OutStr, QualTypeVals); + FormattedArgs, OutStr, QualTypeVals); if (!TDT.TemplateDiffUsed) - FormattedArgs.push_back(std::make_pair(DiagnosticsEngine::ak_qualtype, - TDT.ToType)); + FormattedArgs.push_back( + std::make_pair(DiagnosticsEngine::ak_qualtype, TDT.ToType)); // Append end text FormatDiagnostic(SecondDollar + 1, Pipe, OutStr); @@ -1337,8 +1349,9 @@ FormatDiagnostic(const char *DiagStr, const char *DiagEnd, else if (Kind != DiagnosticsEngine::ak_std_string) FormattedArgs.push_back(std::make_pair(Kind, getRawArg(ArgNo))); else - FormattedArgs.push_back(std::make_pair(DiagnosticsEngine::ak_c_string, - (intptr_t)getArgStdStr(ArgNo).c_str())); + FormattedArgs.push_back( + std::make_pair(DiagnosticsEngine::ak_c_string, + (intptr_t)getArgStdStr(ArgNo).c_str())); } // Append the type tree to the end of the diagnostics. @@ -1352,8 +1365,9 @@ StoredDiagnostic::StoredDiagnostic(DiagnosticsEngine::Level Level, unsigned ID, StoredDiagnostic::StoredDiagnostic(DiagnosticsEngine::Level Level, const Diagnostic &Info) : ID(Info.getID()), Level(Level) { - assert((Info.getLocation().isInvalid() || Info.hasSourceManager()) && - "Valid source location without setting a source manager for diagnostic"); + assert( + (Info.getLocation().isInvalid() || Info.hasSourceManager()) && + "Valid source location without setting a source manager for diagnostic"); if (Info.getLocation().isValid()) Loc = FullSourceLoc(Info.getLocation(), Info.getSourceManager()); SmallString<64> Message; @@ -1368,9 +1382,8 @@ StoredDiagnostic::StoredDiagnostic(DiagnosticsEngine::Level Level, unsigned ID, ArrayRef Ranges, ArrayRef FixIts) : ID(ID), Level(Level), Loc(Loc), Message(Message), - Ranges(Ranges.begin(), Ranges.end()), FixIts(FixIts.begin(), FixIts.end()) -{ -} + Ranges(Ranges.begin(), Ranges.end()), + FixIts(FixIts.begin(), FixIts.end()) {} llvm::raw_ostream &clang::operator<<(llvm::raw_ostream &OS, const StoredDiagnostic &SD) { @@ -1391,8 +1404,7 @@ void IgnoringDiagConsumer::anchor() {} ForwardingDiagnosticConsumer::~ForwardingDiagnosticConsumer() = default; void ForwardingDiagnosticConsumer::HandleDiagnostic( - DiagnosticsEngine::Level DiagLevel, - const Diagnostic &Info) { + DiagnosticsEngine::Level DiagLevel, const Diagnostic &Info) { Target.HandleDiagnostic(DiagLevel, Info); } diff --git a/clang/lib/Basic/ProfileList.cpp b/clang/lib/Basic/ProfileList.cpp index 01b8d7a073432..f2383c76853ec 100644 --- a/clang/lib/Basic/ProfileList.cpp +++ b/clang/lib/Basic/ProfileList.cpp @@ -37,7 +37,7 @@ class ProfileSpecialCaseList : public llvm::SpecialCaseList { bool hasPrefix(StringRef Prefix) const { for (const auto &It : Sections) - if (It.second.Entries.count(Prefix) > 0) + if (It.Entries.count(Prefix) > 0) return true; return false; } diff --git a/clang/lib/Basic/SanitizerSpecialCaseList.cpp b/clang/lib/Basic/SanitizerSpecialCaseList.cpp index b02e868cdaa44..8af46c74535ae 100644 --- a/clang/lib/Basic/SanitizerSpecialCaseList.cpp +++ b/clang/lib/Basic/SanitizerSpecialCaseList.cpp @@ -38,11 +38,10 @@ SanitizerSpecialCaseList::createOrDie(const std::vector &Paths, void SanitizerSpecialCaseList::createSanitizerSections() { for (auto &It : Sections) { - auto &S = It.second; SanitizerMask Mask; #define SANITIZER(NAME, ID) \ - if (S.SectionMatcher->match(NAME)) \ + if (It.SectionMatcher->match(NAME)) \ Mask |= SanitizerKind::ID; #define SANITIZER_GROUP(NAME, ID, ALIAS) SANITIZER(NAME, ID) @@ -50,7 +49,7 @@ void SanitizerSpecialCaseList::createSanitizerSections() { #undef SANITIZER #undef SANITIZER_GROUP - SanitizerSections.emplace_back(Mask, S.Entries); + SanitizerSections.emplace_back(Mask, It.Entries); } } diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index e6ef0ecc526ba..425ad68bb9098 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -679,17 +679,11 @@ bool PPCTargetInfo::initFeatureMap( } } - if (llvm::is_contained(FeaturesVec, "+rop-protect")) { - if (PointerWidth == 32) { - Diags.Report(diag::err_opt_not_valid_on_target) << "-mrop-protect"; - return false; - } - - if (!(ArchDefs & ArchDefinePwr8)) { - // We can turn on ROP Protect on Power 8 and above. - Diags.Report(diag::err_opt_not_valid_with_opt) << "-mrop-protect" << CPU; - return false; - } + if (!(ArchDefs & ArchDefinePwr8) && + llvm::is_contained(FeaturesVec, "+rop-protect")) { + // We can turn on ROP Protect on Power 8 and above. + Diags.Report(diag::err_opt_not_valid_with_opt) << "-mrop-protect" << CPU; + return false; } if (!(ArchDefs & ArchDefinePwr8) && diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 48cfbda12b2ac..4fdf2113cb9dc 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -6115,9 +6115,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_thread_pointer: { if (!getContext().getTargetInfo().isTLSSupported()) CGM.ErrorUnsupported(E, "__builtin_thread_pointer"); - - return RValue::get(Builder.CreateIntrinsic(llvm::Intrinsic::thread_pointer, - {GlobalsInt8PtrTy}, {})); + // Fall through - it's already mapped to the intrinsic by ClangBuiltin. + break; } case Builtin::BI__builtin_os_log_format: return emitBuiltinOSLogFormat(*E); diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index bcd579454413e..aa1909443e8cd 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -1366,23 +1366,19 @@ static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty, // If we are casting a fixed i8 vector to a scalable i1 predicate // vector, use a vector insert and bitcast the result. if (ScalableDstTy->getElementType()->isIntegerTy(1) && + ScalableDstTy->getElementCount().isKnownMultipleOf(8) && FixedSrcTy->getElementType()->isIntegerTy(8)) { ScalableDstTy = llvm::ScalableVectorType::get( FixedSrcTy->getElementType(), - llvm::divideCeil( - ScalableDstTy->getElementCount().getKnownMinValue(), 8)); + ScalableDstTy->getElementCount().getKnownMinValue() / 8); } if (ScalableDstTy->getElementType() == FixedSrcTy->getElementType()) { auto *Load = CGF.Builder.CreateLoad(Src); auto *PoisonVec = llvm::PoisonValue::get(ScalableDstTy); llvm::Value *Result = CGF.Builder.CreateInsertVector( ScalableDstTy, PoisonVec, Load, uint64_t(0), "cast.scalable"); - ScalableDstTy = cast( - llvm::VectorType::getWithSizeAndScalar(ScalableDstTy, Ty)); - if (Result->getType() != ScalableDstTy) - Result = CGF.Builder.CreateBitCast(Result, ScalableDstTy); - if (Result->getType() != Ty) - Result = CGF.Builder.CreateExtractVector(Ty, Result, uint64_t(0)); + if (ScalableDstTy != Ty) + Result = CGF.Builder.CreateBitCast(Result, Ty); return Result; } } @@ -1480,14 +1476,8 @@ CoerceScalableToFixed(CodeGenFunction &CGF, llvm::FixedVectorType *ToTy, // If we are casting a scalable i1 predicate vector to a fixed i8 // vector, first bitcast the source. if (FromTy->getElementType()->isIntegerTy(1) && + FromTy->getElementCount().isKnownMultipleOf(8) && ToTy->getElementType() == CGF.Builder.getInt8Ty()) { - if (!FromTy->getElementCount().isKnownMultipleOf(8)) { - FromTy = llvm::ScalableVectorType::get( - FromTy->getElementType(), - llvm::alignTo<8>(FromTy->getElementCount().getKnownMinValue())); - llvm::Value *ZeroVec = llvm::Constant::getNullValue(FromTy); - V = CGF.Builder.CreateInsertVector(FromTy, ZeroVec, V, uint64_t(0)); - } FromTy = llvm::ScalableVectorType::get( ToTy->getElementType(), FromTy->getElementCount().getKnownMinValue() / 8); diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 37a5678aa61d5..ec01c87c13b1d 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -1226,8 +1226,16 @@ void CodeGenFunction::EmitBoundsCheckImpl(const Expr *E, llvm::Value *Bound, SanitizerScope SanScope(this); + llvm::DILocation *CheckDI = Builder.getCurrentDebugLocation(); auto CheckKind = SanitizerKind::SO_ArrayBounds; - ApplyDebugLocation ApplyTrapDI(*this, SanitizerAnnotateDebugInfo(CheckKind)); + // TODO: deprecate ClArrayBoundsPseudoFn + if ((ClArrayBoundsPseudoFn || + CGM.getCodeGenOpts().SanitizeAnnotateDebugInfo.has(CheckKind)) && + CheckDI) { + CheckDI = getDebugInfo()->CreateSyntheticInlineAt( + Builder.getCurrentDebugLocation(), "__ubsan_check_array_bounds"); + } + ApplyDebugLocation ApplyTrapDI(*this, CheckDI); bool IndexSigned = IndexType->isSignedIntegerOrEnumerationType(); llvm::Value *IndexVal = Builder.CreateIntCast(Index, SizeTy, IndexSigned); @@ -1244,35 +1252,6 @@ void CodeGenFunction::EmitBoundsCheckImpl(const Expr *E, llvm::Value *Bound, StaticData, Index); } -llvm::DILocation *CodeGenFunction::SanitizerAnnotateDebugInfo( - SanitizerKind::SanitizerOrdinal CheckKindOrdinal) { - std::string Label; - switch (CheckKindOrdinal) { -#define SANITIZER(NAME, ID) \ - case SanitizerKind::SO_##ID: \ - Label = "__ubsan_check_" NAME; \ - break; -#include "clang/Basic/Sanitizers.def" - default: - llvm_unreachable("unexpected sanitizer kind"); - } - - // Sanitize label - for (unsigned int i = 0; i < Label.length(); i++) - if (!std::isalpha(Label[i])) - Label[i] = '_'; - - llvm::DILocation *CheckDI = Builder.getCurrentDebugLocation(); - // TODO: deprecate ClArrayBoundsPseudoFn - if (((ClArrayBoundsPseudoFn && - CheckKindOrdinal == SanitizerKind::SO_ArrayBounds) || - CGM.getCodeGenOpts().SanitizeAnnotateDebugInfo.has(CheckKindOrdinal)) && - CheckDI) - CheckDI = getDebugInfo()->CreateSyntheticInlineAt(CheckDI, Label); - - return CheckDI; -} - CodeGenFunction::ComplexPairTy CodeGenFunction:: EmitComplexPrePostIncDec(const UnaryOperator *E, LValue LV, bool isInc, bool isPre) { diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 5d618658bc615..7fe3a1660326b 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -2491,22 +2491,18 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { // If we are casting a fixed i8 vector to a scalable i1 predicate // vector, use a vector insert and bitcast the result. if (ScalableDstTy->getElementType()->isIntegerTy(1) && + ScalableDstTy->getElementCount().isKnownMultipleOf(8) && FixedSrcTy->getElementType()->isIntegerTy(8)) { ScalableDstTy = llvm::ScalableVectorType::get( FixedSrcTy->getElementType(), - llvm::divideCeil( - ScalableDstTy->getElementCount().getKnownMinValue(), 8)); + ScalableDstTy->getElementCount().getKnownMinValue() / 8); } if (FixedSrcTy->getElementType() == ScalableDstTy->getElementType()) { llvm::Value *PoisonVec = llvm::PoisonValue::get(ScalableDstTy); llvm::Value *Result = Builder.CreateInsertVector( ScalableDstTy, PoisonVec, Src, uint64_t(0), "cast.scalable"); - ScalableDstTy = cast( - llvm::VectorType::getWithSizeAndScalar(ScalableDstTy, DstTy)); - if (Result->getType() != ScalableDstTy) - Result = Builder.CreateBitCast(Result, ScalableDstTy); if (Result->getType() != DstTy) - Result = Builder.CreateExtractVector(DstTy, Result, uint64_t(0)); + Result = Builder.CreateBitCast(Result, DstTy); return Result; } } @@ -2520,17 +2516,8 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { // If we are casting a scalable i1 predicate vector to a fixed i8 // vector, bitcast the source and use a vector extract. if (ScalableSrcTy->getElementType()->isIntegerTy(1) && + ScalableSrcTy->getElementCount().isKnownMultipleOf(8) && FixedDstTy->getElementType()->isIntegerTy(8)) { - if (!ScalableSrcTy->getElementCount().isKnownMultipleOf(8)) { - ScalableSrcTy = llvm::ScalableVectorType::get( - ScalableSrcTy->getElementType(), - llvm::alignTo<8>( - ScalableSrcTy->getElementCount().getKnownMinValue())); - llvm::Value *ZeroVec = llvm::Constant::getNullValue(ScalableSrcTy); - Src = Builder.CreateInsertVector(ScalableSrcTy, ZeroVec, Src, - uint64_t(0)); - } - ScalableSrcTy = llvm::ScalableVectorType::get( FixedDstTy->getElementType(), ScalableSrcTy->getElementCount().getKnownMinValue() / 8); diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp index d4a0714da07b3..5d93df34c66b2 100644 --- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp +++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp @@ -303,21 +303,6 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, HandleTy, CGM.getHLSLRuntime().getCreateHandleFromBindingIntrinsic(), ArrayRef{SpaceOp, RegisterOp, RangeOp, IndexOp, NonUniform}); } - case Builtin::BI__builtin_hlsl_resource_handlefromimplicitbinding: { - llvm::Type *HandleTy = CGM.getTypes().ConvertType(E->getType()); - Value *SpaceOp = EmitScalarExpr(E->getArg(1)); - Value *RangeOp = EmitScalarExpr(E->getArg(2)); - Value *IndexOp = EmitScalarExpr(E->getArg(3)); - Value *OrderID = EmitScalarExpr(E->getArg(4)); - // FIXME: NonUniformResourceIndex bit is not yet implemented - // (llvm/llvm-project#135452) - Value *NonUniform = - llvm::ConstantInt::get(llvm::Type::getInt1Ty(getLLVMContext()), false); - return Builder.CreateIntrinsic( - HandleTy, - CGM.getHLSLRuntime().getCreateHandleFromImplicitBindingIntrinsic(), - ArrayRef{OrderID, SpaceOp, RangeOp, IndexOp, NonUniform}); - } case Builtin::BI__builtin_hlsl_all: { Value *Op0 = EmitScalarExpr(E->getArg(0)); return Builder.CreateIntrinsic( diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index e40864d8ed854..4d6db3f5d9f3e 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -119,8 +119,6 @@ class CGHLSLRuntime { resource_getpointer) GENERATE_HLSL_INTRINSIC_FUNCTION(CreateHandleFromBinding, resource_handlefrombinding) - GENERATE_HLSL_INTRINSIC_FUNCTION(CreateHandleFromImplicitBinding, - resource_handlefromimplicitbinding) GENERATE_HLSL_INTRINSIC_FUNCTION(BufferUpdateCounter, resource_updatecounter) GENERATE_HLSL_INTRINSIC_FUNCTION(GroupMemoryBarrierWithGroupSync, group_memory_barrier_with_group_sync) diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 7104303cba50e..fa4ceafc41893 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -2816,11 +2816,6 @@ class CodeGenFunction : public CodeGenTypeCache { void emitStoresForInitAfterBZero(llvm::Constant *Init, Address Loc, bool isVolatile, bool IsAutoInit); - /// Returns debug info, with additional annotation if enabled by - /// CGM.getCodeGenOpts().SanitizeAnnotateDebugInfo[CheckKindOrdinal]. - llvm::DILocation * - SanitizerAnnotateDebugInfo(SanitizerKind::SanitizerOrdinal CheckKindOrdinal); - public: // Captures all the allocas created during the scope of its RAII object. struct AllocaTrackerRAII { diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 16e010adbeb5f..50041f883cfe5 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1689,7 +1689,7 @@ static bool shouldAssumeDSOLocal(const CodeGenModule &CGM, const llvm::Triple &TT = CGM.getTriple(); const auto &CGOpts = CGM.getCodeGenOpts(); - if (TT.isOSCygMing()) { + if (TT.isWindowsGNUEnvironment()) { // In MinGW, variables without DLLImport can still be automatically // imported from a DLL by the linker; don't mark variables that // potentially could come from another DLL as DSO local. diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp index 843733ba6604d..d1b292f23c2d2 100644 --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -108,6 +108,9 @@ llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T) { MT->getNumRows() * MT->getNumColumns()); } + if (T->isMFloat8Type()) + return llvm::Type::getInt8Ty(getLLVMContext()); + llvm::Type *R = ConvertType(T); // Check for the boolean vector case. diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index 8826085c596da..70b53be7e77a3 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -3754,7 +3754,7 @@ static bool ShouldUseExternalRTTIDescriptor(CodeGenModule &CGM, bool IsDLLImport = RD->hasAttr(); // Don't import the RTTI but emit it locally. - if (CGM.getTriple().isOSCygMing()) + if (CGM.getTriple().isWindowsGNUEnvironment()) return false; if (CGM.getVTables().isVTableExternal(RD)) { @@ -4041,7 +4041,10 @@ static llvm::GlobalVariable::LinkageTypes getTypeInfoLinkage(CodeGenModule &CGM, return llvm::GlobalValue::ExternalLinkage; // MinGW always uses LinkOnceODRLinkage for type info. if (RD->isDynamicClass() && - !CGM.getContext().getTargetInfo().getTriple().isOSCygMing()) + !CGM.getContext() + .getTargetInfo() + .getTriple() + .isWindowsGNUEnvironment()) return CGM.getVTableLinkage(RD); } diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp index 1cf8f6819b75a..d37e68508373c 100644 --- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp @@ -2624,26 +2624,22 @@ static bool HasExtraNeonArgument(unsigned BuiltinID) { case NEON::BI__builtin_neon_vget_lane_bf16: case NEON::BI__builtin_neon_vget_lane_i32: case NEON::BI__builtin_neon_vget_lane_i64: - case NEON::BI__builtin_neon_vget_lane_mf8: case NEON::BI__builtin_neon_vget_lane_f32: case NEON::BI__builtin_neon_vgetq_lane_i8: case NEON::BI__builtin_neon_vgetq_lane_i16: case NEON::BI__builtin_neon_vgetq_lane_bf16: case NEON::BI__builtin_neon_vgetq_lane_i32: case NEON::BI__builtin_neon_vgetq_lane_i64: - case NEON::BI__builtin_neon_vgetq_lane_mf8: case NEON::BI__builtin_neon_vgetq_lane_f32: case NEON::BI__builtin_neon_vduph_lane_bf16: case NEON::BI__builtin_neon_vduph_laneq_bf16: case NEON::BI__builtin_neon_vset_lane_i8: - case NEON::BI__builtin_neon_vset_lane_mf8: case NEON::BI__builtin_neon_vset_lane_i16: case NEON::BI__builtin_neon_vset_lane_bf16: case NEON::BI__builtin_neon_vset_lane_i32: case NEON::BI__builtin_neon_vset_lane_i64: case NEON::BI__builtin_neon_vset_lane_f32: case NEON::BI__builtin_neon_vsetq_lane_i8: - case NEON::BI__builtin_neon_vsetq_lane_mf8: case NEON::BI__builtin_neon_vsetq_lane_i16: case NEON::BI__builtin_neon_vsetq_lane_bf16: case NEON::BI__builtin_neon_vsetq_lane_i32: @@ -4183,17 +4179,9 @@ Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E, unsigned IntrinsicID, bool IsZExtReturn) { QualType LangPTy = E->getArg(1)->getType(); - llvm::Type *MemEltTy = CGM.getTypes().ConvertType( + llvm::Type *MemEltTy = CGM.getTypes().ConvertTypeForMem( LangPTy->castAs()->getPointeeType()); - // Mfloat8 types is stored as a vector, so extra work - // to extract sclar element type is necessary. - if (MemEltTy->isVectorTy()) { - assert(MemEltTy == FixedVectorType::get(Int8Ty, 1) && - "Only <1 x i8> expected"); - MemEltTy = cast(MemEltTy)->getElementType(); - } - // The vector type that is returned may be different from the // eventual type loaded from memory. auto VectorTy = cast(ReturnTy); @@ -4238,17 +4226,9 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E, SmallVectorImpl &Ops, unsigned IntrinsicID) { QualType LangPTy = E->getArg(1)->getType(); - llvm::Type *MemEltTy = CGM.getTypes().ConvertType( + llvm::Type *MemEltTy = CGM.getTypes().ConvertTypeForMem( LangPTy->castAs()->getPointeeType()); - // Mfloat8 types is stored as a vector, so extra work - // to extract sclar element type is necessary. - if (MemEltTy->isVectorTy()) { - assert(MemEltTy == FixedVectorType::get(Int8Ty, 1) && - "Only <1 x i8> expected"); - MemEltTy = cast(MemEltTy)->getElementType(); - } - // The vector type that is stored may be different from the // eventual type stored to memory. auto VectorTy = cast(Ops.back()->getType()); @@ -6182,13 +6162,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1)); Ops.push_back(EmitScalarExpr(E->getArg(2))); return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); - case NEON::BI__builtin_neon_vset_lane_mf8: - case NEON::BI__builtin_neon_vsetq_lane_mf8: - Ops.push_back(EmitScalarExpr(E->getArg(2))); - // The input vector type needs a cast to scalar type. - Ops[0] = - Builder.CreateBitCast(Ops[0], llvm::Type::getInt8Ty(getLLVMContext())); - return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); case NEON::BI__builtin_neon_vsetq_lane_f64: // The vector type needs a cast for the v2f64 variant. Ops[1] = @@ -6208,12 +6181,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vgetq_lane"); - case NEON::BI__builtin_neon_vget_lane_mf8: - case NEON::BI__builtin_neon_vdupb_lane_mf8: - case NEON::BI__builtin_neon_vgetq_lane_mf8: - case NEON::BI__builtin_neon_vdupb_laneq_mf8: - return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), - "vget_lane"); case NEON::BI__builtin_neon_vget_lane_i16: case NEON::BI__builtin_neon_vduph_lane_i16: Ops[0] = @@ -7663,7 +7630,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd"); } - case NEON::BI__builtin_neon_vluti2_laneq_mf8: case NEON::BI__builtin_neon_vluti2_laneq_bf16: case NEON::BI__builtin_neon_vluti2_laneq_f16: case NEON::BI__builtin_neon_vluti2_laneq_p16: @@ -7679,7 +7645,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, /*isQuad*/ false)); return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq"); } - case NEON::BI__builtin_neon_vluti2q_laneq_mf8: case NEON::BI__builtin_neon_vluti2q_laneq_bf16: case NEON::BI__builtin_neon_vluti2q_laneq_f16: case NEON::BI__builtin_neon_vluti2q_laneq_p16: @@ -7695,7 +7660,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, /*isQuad*/ true)); return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_laneq"); } - case NEON::BI__builtin_neon_vluti2_lane_mf8: case NEON::BI__builtin_neon_vluti2_lane_bf16: case NEON::BI__builtin_neon_vluti2_lane_f16: case NEON::BI__builtin_neon_vluti2_lane_p16: @@ -7711,7 +7675,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, /*isQuad*/ false)); return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane"); } - case NEON::BI__builtin_neon_vluti2q_lane_mf8: case NEON::BI__builtin_neon_vluti2q_lane_bf16: case NEON::BI__builtin_neon_vluti2q_lane_f16: case NEON::BI__builtin_neon_vluti2q_lane_p16: @@ -7727,14 +7690,12 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, /*isQuad*/ true)); return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vluti2_lane"); } - case NEON::BI__builtin_neon_vluti4q_lane_mf8: case NEON::BI__builtin_neon_vluti4q_lane_p8: case NEON::BI__builtin_neon_vluti4q_lane_s8: case NEON::BI__builtin_neon_vluti4q_lane_u8: { Int = Intrinsic::aarch64_neon_vluti4q_lane; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_lane"); } - case NEON::BI__builtin_neon_vluti4q_laneq_mf8: case NEON::BI__builtin_neon_vluti4q_laneq_p8: case NEON::BI__builtin_neon_vluti4q_laneq_s8: case NEON::BI__builtin_neon_vluti4q_laneq_u8: { diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp index eb4718909c951..85c4a754f93c5 100644 --- a/clang/lib/Driver/SanitizerArgs.cpp +++ b/clang/lib/Driver/SanitizerArgs.cpp @@ -160,10 +160,6 @@ static std::string describeSanitizeArg(const llvm::opt::Arg *A, /// Sanitizers set. static std::string toString(const clang::SanitizerSet &Sanitizers); -/// Produce a string containing comma-separated names of sanitizers and -/// sanitizer groups in \p Sanitizers set. -static std::string toStringWithGroups(const clang::SanitizerSet &Sanitizers); - /// Return true if an execute-only target disallows data access to code /// sections. static bool isExecuteOnlyTarget(const llvm::Triple &Triple, @@ -293,7 +289,7 @@ parseSanitizeArgs(const Driver &D, const llvm::opt::ArgList &Args, SanitizerSet SetToDiagnose; SetToDiagnose.Mask |= KindsToDiagnose; D.Diag(diag::err_drv_unsupported_option_argument) - << Arg->getSpelling() << toStringWithGroups(SetToDiagnose); + << Arg->getSpelling() << toString(SetToDiagnose); DiagnosedAlwaysOutViolations |= KindsToDiagnose; } } @@ -309,7 +305,7 @@ parseSanitizeArgs(const Driver &D, const llvm::opt::ArgList &Args, SanitizerSet SetToDiagnose; SetToDiagnose.Mask |= KindsToDiagnose; D.Diag(diag::err_drv_unsupported_option_argument) - << Arg->getSpelling() << toStringWithGroups(SetToDiagnose); + << Arg->getSpelling() << toString(SetToDiagnose); DiagnosedAlwaysInViolations |= KindsToDiagnose; } } @@ -1204,19 +1200,6 @@ static std::string toString(const clang::SanitizerMaskCutoffs &Cutoffs) { return llvm::join(Res, ","); } -static std::string toStringWithGroups(const clang::SanitizerSet &Sanitizers) { - std::string Res; -#define SANITIZER(NAME, ID) \ - if (Sanitizers.has(SanitizerKind::ID)) { \ - if (!Res.empty()) \ - Res += ","; \ - Res += NAME; \ - } -#define SANITIZER_GROUP(NAME, ID, ALIAS) SANITIZER(NAME, ID##Group) -#include "clang/Basic/Sanitizers.def" - return Res; -} - static void addSpecialCaseListOpt(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs, const char *SCLOptFlag, diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index fd48e425a5c21..394512978b521 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -3112,11 +3112,9 @@ static bool ParseFrontendArgs(FrontendOptions &Opts, ArgList &Args, if (const Arg *A = Args.getLastArg(OPT_code_completion_at)) { Opts.CodeCompletionAt = ParsedSourceLocation::FromString(A->getValue()); - if (Opts.CodeCompletionAt.FileName.empty()) { + if (Opts.CodeCompletionAt.FileName.empty()) Diags.Report(diag::err_drv_invalid_value) - << A->getAsString(Args) << A->getValue(); - Diags.Report(diag::note_command_line_code_loc_requirement); - } + << A->getAsString(Args) << A->getValue(); } Opts.Plugins = Args.getAllArgValues(OPT_load); diff --git a/clang/lib/Headers/__clang_hip_cmath.h b/clang/lib/Headers/__clang_hip_cmath.h index 8dbde4291fff5..acb46c81db58a 100644 --- a/clang/lib/Headers/__clang_hip_cmath.h +++ b/clang/lib/Headers/__clang_hip_cmath.h @@ -464,13 +464,12 @@ class __promote : public __promote_imp<_A1, _A2, _A3> {}; #if __cplusplus >= 201103L #define __HIP_OVERLOAD2(__retty, __fn) \ template \ - __DEVICE__ __CONSTEXPR__ \ - typename __hip_enable_if<__hip::is_arithmetic<__T1>::value && \ - __hip::is_arithmetic<__T2>::value, \ - __retty>::type \ - __fn(__T1 __x, __T2 __y) { \ - typedef typename __hip::__promote<__T1, __T2>::type __arg_type; \ - return __fn((__arg_type)__x, (__arg_type)__y); \ + __DEVICE__ __CONSTEXPR__ typename __hip_enable_if< \ + __hip::is_arithmetic<__T1>::value && __hip::is_arithmetic<__T2>::value, \ + typename __hip::__promote<__T1, __T2>::type>::type \ + __fn(__T1 __x, __T2 __y) { \ + typedef typename __hip::__promote<__T1, __T2>::type __result_type; \ + return __fn((__result_type)__x, (__result_type)__y); \ } #else #define __HIP_OVERLOAD2(__retty, __fn) \ diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index cfffcdb01a514..d7840d97e8d9b 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -2613,8 +2613,9 @@ StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective( Diag(Tok, diag::err_omp_unknown_directive); return StmtError(); } - if (!(getDirectiveLanguages(DKind) & SourceLanguage::C)) { - // Treat directives that are not allowed in C/C++ as unknown. + if (DKind == OMPD_workshare) { + // "workshare" is an executable, Fortran-only directive. Treat it + // as unknown. DKind = OMPD_unknown; } diff --git a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp index 7a561638aebbc..35364a4d6f2ac 100644 --- a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp +++ b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.cpp @@ -668,26 +668,6 @@ BuiltinTypeDeclBuilder::addHandleConstructorFromBinding() { .finalize(); } -BuiltinTypeDeclBuilder & -BuiltinTypeDeclBuilder::addHandleConstructorFromImplicitBinding() { - if (Record->isCompleteDefinition()) - return *this; - - using PH = BuiltinTypeMethodBuilder::PlaceHolder; - ASTContext &AST = SemaRef.getASTContext(); - QualType HandleType = getResourceHandleField()->getType(); - - return BuiltinTypeMethodBuilder(*this, "", AST.VoidTy, false, true) - .addParam("spaceNo", AST.UnsignedIntTy) - .addParam("range", AST.IntTy) - .addParam("index", AST.UnsignedIntTy) - .addParam("orderId", AST.UnsignedIntTy) - .callBuiltin("__builtin_hlsl_resource_handlefromimplicitbinding", - HandleType, PH::Handle, PH::_0, PH::_1, PH::_2, PH::_3) - .assign(PH::Handle, PH::LastStmt) - .finalize(); -} - BuiltinTypeDeclBuilder &BuiltinTypeDeclBuilder::addArraySubscriptOperators() { ASTContext &AST = Record->getASTContext(); DeclarationName Subscript = diff --git a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h index a52e2938104c7..db617dc53c899 100644 --- a/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h +++ b/clang/lib/Sema/HLSLBuiltinTypeDeclBuilder.h @@ -76,10 +76,9 @@ class BuiltinTypeDeclBuilder { AccessSpecifier Access = AccessSpecifier::AS_private); BuiltinTypeDeclBuilder &addArraySubscriptOperators(); - // Builtin types constructors + // Builtin types methods BuiltinTypeDeclBuilder &addDefaultHandleConstructor(); BuiltinTypeDeclBuilder &addHandleConstructorFromBinding(); - BuiltinTypeDeclBuilder &addHandleConstructorFromImplicitBinding(); // Builtin types methods BuiltinTypeDeclBuilder &addLoadMethods(); diff --git a/clang/lib/Sema/HLSLExternalSemaSource.cpp b/clang/lib/Sema/HLSLExternalSemaSource.cpp index 38bde7c28e946..f09232a9db4da 100644 --- a/clang/lib/Sema/HLSLExternalSemaSource.cpp +++ b/clang/lib/Sema/HLSLExternalSemaSource.cpp @@ -132,8 +132,7 @@ static BuiltinTypeDeclBuilder setupBufferType(CXXRecordDecl *Decl, Sema &S, return BuiltinTypeDeclBuilder(S, Decl) .addHandleMember(RC, IsROV, RawBuffer) .addDefaultHandleConstructor() - .addHandleConstructorFromBinding() - .addHandleConstructorFromImplicitBinding(); + .addHandleConstructorFromBinding(); } // This function is responsible for constructing the constraint expression for diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 84b84de28c511..d7c62b44a5c50 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -14,7 +14,6 @@ #include "CheckExprLifetime.h" #include "clang/AST/APValue.h" #include "clang/AST/ASTContext.h" -#include "clang/AST/ASTDiagnostic.h" #include "clang/AST/Attr.h" #include "clang/AST/AttrIterator.h" #include "clang/AST/CharUnits.h" @@ -11872,47 +11871,6 @@ static void DiagnoseIntInBoolContext(Sema &S, Expr *E) { } } -static void DiagnoseMixedUnicodeImplicitConversion(Sema &S, const Type *Source, - const Type *Target, Expr *E, - QualType T, - SourceLocation CC) { - assert(Source->isUnicodeCharacterType() && Target->isUnicodeCharacterType() && - Source != Target); - Expr::EvalResult Result; - if (E->EvaluateAsInt(Result, S.getASTContext(), Expr::SE_AllowSideEffects, - S.isConstantEvaluatedContext())) { - llvm::APSInt Value(32); - Value = Result.Val.getInt(); - bool IsASCII = Value <= 0x7F; - bool IsBMP = Value <= 0xD7FF || (Value >= 0xE000 && Value <= 0xFFFF); - bool ConversionPreservesSemantics = - IsASCII || (!Source->isChar8Type() && !Target->isChar8Type() && IsBMP); - - if (!ConversionPreservesSemantics) { - auto IsSingleCodeUnitCP = [](const QualType &T, - const llvm::APSInt &Value) { - if (T->isChar8Type()) - return llvm::IsSingleCodeUnitUTF8Codepoint(Value.getExtValue()); - if (T->isChar16Type()) - return llvm::IsSingleCodeUnitUTF16Codepoint(Value.getExtValue()); - assert(T->isChar32Type()); - return llvm::IsSingleCodeUnitUTF32Codepoint(Value.getExtValue()); - }; - - S.Diag(CC, diag::warn_impcast_unicode_char_type_constant) - << E->getType() << T - << IsSingleCodeUnitCP(E->getType().getUnqualifiedType(), Value) - << FormatUTFCodeUnitAsCodepoint(Value.getExtValue(), E->getType()); - } - } else { - bool LosesPrecision = S.getASTContext().getIntWidth(E->getType()) > - S.getASTContext().getIntWidth(T); - DiagnoseImpCast(S, E, T, CC, - LosesPrecision ? diag::warn_impcast_unicode_precision - : diag::warn_impcast_unicode_char_type); - } -} - void Sema::CheckImplicitConversion(Expr *E, QualType T, SourceLocation CC, bool *ICContext, bool IsListInit) { if (E->isTypeDependent() || E->isValueDependent()) return; @@ -12250,11 +12208,6 @@ void Sema::CheckImplicitConversion(Expr *E, QualType T, SourceLocation CC, DiscardMisalignedMemberAddress(Target, E); - if (Source->isUnicodeCharacterType() && Target->isUnicodeCharacterType()) { - DiagnoseMixedUnicodeImplicitConversion(*this, Source, Target, E, T, CC); - return; - } - if (Target->isBooleanType()) DiagnoseIntInBoolContext(*this, E); diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 1bd9056cad812..1ec613c6717e3 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -6274,7 +6274,7 @@ static void ReferenceDllExportedMembers(Sema &S, CXXRecordDecl *Class) { } } MarkingDllexportedContext(S, Class, ClassAttr->getLocation()); - if (S.Context.getTargetInfo().getTriple().isOSCygMing()) + if (S.Context.getTargetInfo().getTriple().isWindowsGNUEnvironment()) S.MarkVTableUsed(Class->getLocation(), Class, true); for (Decl *Member : Class->decls()) { @@ -6576,7 +6576,7 @@ void Sema::checkClassLevelDLLAttribute(CXXRecordDecl *Class) { // declarations, except in MinGW mode. if (ClassExported && !ClassAttr->isInherited() && TSK == TSK_ExplicitInstantiationDeclaration && - !Context.getTargetInfo().getTriple().isOSCygMing()) { + !Context.getTargetInfo().getTriple().isWindowsGNUEnvironment()) { Class->dropAttr(); return; } diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 91e63c7cb8677..2c81f7c583eb6 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -15,7 +15,6 @@ #include "UsedDeclVisitor.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/ASTContext.h" -#include "clang/AST/ASTDiagnostic.h" #include "clang/AST/ASTLambda.h" #include "clang/AST/ASTMutationListener.h" #include "clang/AST/CXXInheritance.h" @@ -1569,79 +1568,6 @@ void Sema::checkEnumArithmeticConversions(Expr *LHS, Expr *RHS, } } -static void CheckUnicodeArithmeticConversions(Sema &SemaRef, Expr *LHS, - Expr *RHS, SourceLocation Loc, - ArithConvKind ACK) { - QualType LHSType = LHS->getType().getUnqualifiedType(); - QualType RHSType = RHS->getType().getUnqualifiedType(); - - if (!SemaRef.getLangOpts().CPlusPlus || !LHSType->isUnicodeCharacterType() || - !RHSType->isUnicodeCharacterType()) - return; - - if (ACK == ArithConvKind::Comparison) { - if (SemaRef.getASTContext().hasSameType(LHSType, RHSType)) - return; - - auto IsSingleCodeUnitCP = [](const QualType &T, const llvm::APSInt &Value) { - if (T->isChar8Type()) - return llvm::IsSingleCodeUnitUTF8Codepoint(Value.getExtValue()); - if (T->isChar16Type()) - return llvm::IsSingleCodeUnitUTF16Codepoint(Value.getExtValue()); - assert(T->isChar32Type()); - return llvm::IsSingleCodeUnitUTF32Codepoint(Value.getExtValue()); - }; - - Expr::EvalResult LHSRes, RHSRes; - bool LHSSuccess = LHS->EvaluateAsInt(LHSRes, SemaRef.getASTContext(), - Expr::SE_AllowSideEffects, - SemaRef.isConstantEvaluatedContext()); - bool RHSuccess = RHS->EvaluateAsInt(RHSRes, SemaRef.getASTContext(), - Expr::SE_AllowSideEffects, - SemaRef.isConstantEvaluatedContext()); - - // Don't warn if the one known value is a representable - // in the type of both expressions. - if (LHSSuccess != RHSuccess) { - Expr::EvalResult &Res = LHSSuccess ? LHSRes : RHSRes; - if (IsSingleCodeUnitCP(LHSType, Res.Val.getInt()) && - IsSingleCodeUnitCP(RHSType, Res.Val.getInt())) - return; - } - - if (!LHSSuccess || !RHSuccess) { - SemaRef.Diag(Loc, diag::warn_comparison_unicode_mixed_types) - << LHS->getSourceRange() << RHS->getSourceRange() << LHSType - << RHSType; - return; - } - - llvm::APSInt LHSValue(32); - LHSValue = LHSRes.Val.getInt(); - llvm::APSInt RHSValue(32); - RHSValue = RHSRes.Val.getInt(); - - bool LHSSafe = IsSingleCodeUnitCP(LHSType, LHSValue); - bool RHSSafe = IsSingleCodeUnitCP(RHSType, RHSValue); - if (LHSSafe && RHSSafe) - return; - - SemaRef.Diag(Loc, diag::warn_comparison_unicode_mixed_types_constant) - << LHS->getSourceRange() << RHS->getSourceRange() << LHSType << RHSType - << FormatUTFCodeUnitAsCodepoint(LHSValue.getExtValue(), LHSType) - << FormatUTFCodeUnitAsCodepoint(RHSValue.getExtValue(), RHSType); - return; - } - - if (SemaRef.getASTContext().hasSameType(LHSType, RHSType)) - return; - - SemaRef.Diag(Loc, diag::warn_arith_conv_mixed_unicode_types) - << LHS->getSourceRange() << RHS->getSourceRange() << ACK << LHSType - << RHSType; - return; -} - /// UsualArithmeticConversions - Performs various conversions that are common to /// binary operators (C99 6.3.1.8). If both operands aren't arithmetic, this /// routine returns the first non-arithmetic type found. The client is @@ -1649,11 +1575,8 @@ static void CheckUnicodeArithmeticConversions(Sema &SemaRef, Expr *LHS, QualType Sema::UsualArithmeticConversions(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, ArithConvKind ACK) { - checkEnumArithmeticConversions(LHS.get(), RHS.get(), Loc, ACK); - CheckUnicodeArithmeticConversions(*this, LHS.get(), RHS.get(), Loc, ACK); - if (ACK != ArithConvKind::CompAssign) { LHS = UsualUnaryConversions(LHS.get()); if (LHS.isInvalid()) @@ -7318,20 +7241,10 @@ Sema::BuildCompoundLiteralExpr(SourceLocation LParenLoc, TypeSourceInfo *TInfo, ? VK_PRValue : VK_LValue; - // C99 6.5.2.5 - // "If the compound literal occurs outside the body of a function, the - // initializer list shall consist of constant expressions." if (IsFileScope) if (auto ILE = dyn_cast(LiteralExpr)) for (unsigned i = 0, j = ILE->getNumInits(); i != j; i++) { Expr *Init = ILE->getInit(i); - if (!Init->isTypeDependent() && !Init->isValueDependent() && - !Init->isConstantInitializer(Context, /*IsForRef=*/false)) { - Diag(Init->getExprLoc(), diag::err_init_element_not_constant) - << Init->getSourceBitField(); - return ExprError(); - } - ILE->setInit(i, ConstantExpr::Create(Context, Init)); } diff --git a/clang/lib/Sema/SemaExprMember.cpp b/clang/lib/Sema/SemaExprMember.cpp index 39c162c3b835d..053414ff7a1a7 100644 --- a/clang/lib/Sema/SemaExprMember.cpp +++ b/clang/lib/Sema/SemaExprMember.cpp @@ -1385,7 +1385,7 @@ static ExprResult LookupMemberExpr(Sema &S, LookupResult &R, // lvalue. Because this is inherently unsafe as an atomic operation, the // warning defaults to an error. if (const auto *ATy = BaseType->getAs()) { - S.DiagRuntimeBehavior(OpLoc, BaseExpr.get(), + S.DiagRuntimeBehavior(OpLoc, nullptr, S.PDiag(diag::warn_atomic_member_access)); BaseType = ATy->getValueType().getUnqualifiedType(); BaseExpr = ImplicitCastExpr::Create( diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 16f3986179aea..744ec439b2393 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -2454,20 +2454,6 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { TheCall->setType(ResourceTy); break; } - case Builtin::BI__builtin_hlsl_resource_handlefromimplicitbinding: { - ASTContext &AST = SemaRef.getASTContext(); - if (SemaRef.checkArgCount(TheCall, 5) || - CheckResourceHandle(&SemaRef, TheCall, 0) || - CheckArgTypeMatches(&SemaRef, TheCall->getArg(1), AST.UnsignedIntTy) || - CheckArgTypeMatches(&SemaRef, TheCall->getArg(2), AST.IntTy) || - CheckArgTypeMatches(&SemaRef, TheCall->getArg(3), AST.UnsignedIntTy) || - CheckArgTypeMatches(&SemaRef, TheCall->getArg(4), AST.UnsignedIntTy)) - return true; - // use the type of the handle (arg0) as a return type - QualType ResourceTy = TheCall->getArg(0)->getType(); - TheCall->setType(ResourceTy); - break; - } case Builtin::BI__builtin_hlsl_and: case Builtin::BI__builtin_hlsl_or: { if (SemaRef.checkArgCount(TheCall, 2)) @@ -3299,10 +3285,8 @@ static bool initVarDeclWithCtor(Sema &S, VarDecl *VD, VD->getLocation(), SourceLocation(), SourceLocation()); InitializationSequence InitSeq(S, Entity, Kind, Args); - if (InitSeq.Failed()) - return false; - ExprResult Init = InitSeq.Perform(S, Entity, Kind, Args); + if (!Init.get()) return false; @@ -3312,42 +3296,27 @@ static bool initVarDeclWithCtor(Sema &S, VarDecl *VD, return true; } -bool SemaHLSL::initGlobalResourceDecl(VarDecl *VD) { - std::optional RegisterSlot; - uint32_t SpaceNo = 0; +static bool initGlobalResourceDecl(Sema &S, VarDecl *VD) { HLSLResourceBindingAttr *RBA = VD->getAttr(); - if (RBA) { - if (RBA->hasRegisterSlot()) - RegisterSlot = RBA->getSlotNumber(); - SpaceNo = RBA->getSpaceNumber(); - } + if (!RBA || !RBA->hasRegisterSlot()) + // FIXME: add support for implicit binding (llvm/llvm-project#110722) + return false; - ASTContext &AST = SemaRef.getASTContext(); + ASTContext &AST = S.getASTContext(); uint64_t UIntTySize = AST.getTypeSize(AST.UnsignedIntTy); uint64_t IntTySize = AST.getTypeSize(AST.IntTy); - IntegerLiteral *RangeSize = IntegerLiteral::Create( - AST, llvm::APInt(IntTySize, 1), AST.IntTy, SourceLocation()); - IntegerLiteral *Index = IntegerLiteral::Create( - AST, llvm::APInt(UIntTySize, 0), AST.UnsignedIntTy, SourceLocation()); - IntegerLiteral *Space = - IntegerLiteral::Create(AST, llvm::APInt(UIntTySize, SpaceNo), - AST.UnsignedIntTy, SourceLocation()); - - // resource with explicit binding - if (RegisterSlot.has_value()) { - IntegerLiteral *RegSlot = IntegerLiteral::Create( - AST, llvm::APInt(UIntTySize, RegisterSlot.value()), AST.UnsignedIntTy, - SourceLocation()); - Expr *Args[] = {RegSlot, Space, RangeSize, Index}; - return initVarDeclWithCtor(SemaRef, VD, Args); - } - - // resource with implicit binding - IntegerLiteral *OrderId = IntegerLiteral::Create( - AST, llvm::APInt(UIntTySize, getNextImplicitBindingOrderID()), - AST.UnsignedIntTy, SourceLocation()); - Expr *Args[] = {Space, RangeSize, Index, OrderId}; - return initVarDeclWithCtor(SemaRef, VD, Args); + Expr *Args[] = { + IntegerLiteral::Create(AST, llvm::APInt(UIntTySize, RBA->getSlotNumber()), + AST.UnsignedIntTy, SourceLocation()), + IntegerLiteral::Create(AST, + llvm::APInt(UIntTySize, RBA->getSpaceNumber()), + AST.UnsignedIntTy, SourceLocation()), + IntegerLiteral::Create(AST, llvm::APInt(IntTySize, 1), AST.IntTy, + SourceLocation()), + IntegerLiteral::Create(AST, llvm::APInt(UIntTySize, 0), AST.UnsignedIntTy, + SourceLocation())}; + + return initVarDeclWithCtor(S, VD, Args); } // Returns true if the initialization has been handled. @@ -3365,9 +3334,8 @@ bool SemaHLSL::ActOnUninitializedVarDecl(VarDecl *VD) { // FIXME: We currectly support only simple resources - no arrays of resources // or resources in user defined structs. // (llvm/llvm-project#133835, llvm/llvm-project#133837) - // Initialize resources at the global scope - if (VD->hasGlobalStorage() && VD->getType()->isHLSLResourceRecord()) - return initGlobalResourceDecl(VD); + if (VD->getType()->isHLSLResourceRecord()) + return initGlobalResourceDecl(SemaRef, VD); return false; } diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index adce0a15bc320..dc12bacc0158b 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -1976,8 +1976,6 @@ void InitListChecker::CheckVectorType(const InitializedEntity &Entity, typeCode = "s"; else if (elementType->isUnsignedIntegerType()) typeCode = "u"; - else if (elementType->isMFloat8Type()) - typeCode = "mf"; else llvm_unreachable("Invalid element type!"); diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 23304e12f8c31..e20a41c10ccaa 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -7846,8 +7846,6 @@ static void AddMethodTemplateCandidateImmediately( MethodTmpl, ExplicitTemplateArgs, Args, Specialization, Info, PartialOverloading, /*AggregateDeductionCandidate=*/false, /*PartialOrdering=*/false, ObjectType, ObjectClassification, - CandidateSet.getKind() == - clang::OverloadCandidateSet::CSK_AddressOfOverloadSet, [&](ArrayRef ParamTypes) { return S.CheckNonDependentConversions( MethodTmpl, ParamTypes, Args, CandidateSet, Conversions, @@ -7962,8 +7960,6 @@ static void AddTemplateOverloadCandidateImmediately( /*PartialOrdering=*/false, /*ObjectType=*/QualType(), /*ObjectClassification=*/Expr::Classification(), - CandidateSet.getKind() == - OverloadCandidateSet::CSK_AddressOfOverloadSet, [&](ArrayRef ParamTypes) { return S.CheckNonDependentConversions( FunctionTemplate, ParamTypes, Args, CandidateSet, Conversions, diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index 14f9d1d03c5ed..e306da357ca86 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -9021,7 +9021,8 @@ Sema::CheckSpecializationInstantiationRedecl(SourceLocation NewLoc, // The declaration itself has not actually been instantiated, so it is // still okay to specialize it. StripImplicitInstantiation( - PrevDecl, Context.getTargetInfo().getTriple().isOSCygMing()); + PrevDecl, + Context.getTargetInfo().getTriple().isWindowsGNUEnvironment()); return false; } // Fall through @@ -9891,7 +9892,7 @@ DeclResult Sema::ActOnExplicitInstantiation( : TSK_ExplicitInstantiationDeclaration; if (TSK == TSK_ExplicitInstantiationDeclaration && - !Context.getTargetInfo().getTriple().isOSCygMing()) { + !Context.getTargetInfo().getTriple().isWindowsGNUEnvironment()) { // Check for dllexport class template instantiation declarations, // except for MinGW mode. for (const ParsedAttr &AL : Attr) { @@ -9956,7 +9957,7 @@ DeclResult Sema::ActOnExplicitInstantiation( = PrevDecl ? PrevDecl->getTemplateSpecializationKind() : TSK_Undeclared; if (TSK == TSK_ExplicitInstantiationDefinition && PrevDecl != nullptr && - Context.getTargetInfo().getTriple().isOSCygMing()) { + Context.getTargetInfo().getTriple().isWindowsGNUEnvironment()) { // Check for dllexport class template instantiation definitions in MinGW // mode, if a previous declaration of the instantiation was seen. for (const ParsedAttr &AL : Attr) { @@ -10124,7 +10125,7 @@ DeclResult Sema::ActOnExplicitInstantiation( // In MinGW mode, export the template instantiation if the declaration // was marked dllexport. if (PrevDecl_TSK == TSK_ExplicitInstantiationDeclaration && - Context.getTargetInfo().getTriple().isOSCygMing() && + Context.getTargetInfo().getTriple().isWindowsGNUEnvironment() && PrevDecl->hasAttr()) { dllExportImportClassTemplateSpecialization(*this, Def); } diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp index 217d57d67f067..5dc06ebc2a235 100644 --- a/clang/lib/Sema/SemaTemplateDeduction.cpp +++ b/clang/lib/Sema/SemaTemplateDeduction.cpp @@ -4432,7 +4432,6 @@ TemplateDeductionResult Sema::DeduceTemplateArguments( bool PartialOverloading, bool AggregateDeductionCandidate, bool PartialOrdering, QualType ObjectType, Expr::Classification ObjectClassification, - bool ForOverloadSetAddressResolution, llvm::function_ref)> CheckNonDependent) { if (FunctionTemplate->isInvalidDecl()) return TemplateDeductionResult::Invalid; @@ -4441,15 +4440,7 @@ TemplateDeductionResult Sema::DeduceTemplateArguments( unsigned NumParams = Function->getNumParams(); bool HasExplicitObject = false; int ExplicitObjectOffset = 0; - - // [C++26] [over.call.func]p3 - // If the primary-expression is the address of an overload set, - // the argument list is the same as the expression-list in the call. - // Otherwise, the argument list is the expression-list in the call augmented - // by the addition of an implied object argument as in a qualified function - // call. - if (!ForOverloadSetAddressResolution && - Function->hasCXXExplicitFunctionObjectParameter()) { + if (Function->hasCXXExplicitFunctionObjectParameter()) { HasExplicitObject = true; ExplicitObjectOffset = 1; } diff --git a/clang/lib/StaticAnalyzer/Checkers/AnalyzerStatsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/AnalyzerStatsChecker.cpp index 0aaa32faefa39..d030e69a2a6e0 100644 --- a/clang/lib/StaticAnalyzer/Checkers/AnalyzerStatsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/AnalyzerStatsChecker.cpp @@ -45,7 +45,9 @@ void AnalyzerStatsChecker::checkEndAnalysis(ExplodedGraph &G, const SourceManager &SM = B.getSourceManager(); llvm::SmallPtrSet reachable; - const LocationContext *LC = Eng.getRootLocationContext(); + // Root node should have the location context of the top most function. + const ExplodedNode *GraphRoot = *G.roots_begin(); + const LocationContext *LC = GraphRoot->getLocation().getLocationContext(); const Decl *D = LC->getDecl(); diff --git a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp index d5bc3ac2962d5..28b96f2717210 100644 --- a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp +++ b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp @@ -2660,7 +2660,8 @@ BugPathGetter::BugPathGetter(const ExplodedGraph *OriginalGraph, // Perform a forward BFS to find all the shortest paths. std::queue WS; - WS.push(TrimmedGraph->getRoot()); + assert(TrimmedGraph->num_roots() == 1); + WS.push(*TrimmedGraph->roots_begin()); unsigned Priority = 0; while (!WS.empty()) { @@ -2721,9 +2722,7 @@ BugPathInfo *BugPathGetter::getNextBugPath() { // Are we at the final node? if (OrigN->pred_empty()) { - assert(OrigN == TrimmedGraph->getRoot() && - "There should be only one root!"); - GNew->designateAsRoot(NewN); + GNew->addRoot(NewN); break; } diff --git a/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp b/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp index 2e6631f2f620c..8ba304b3af0ca 100644 --- a/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp @@ -87,9 +87,8 @@ void CoreEngine::setBlockCounter(BlockCounter C) { /// ExecuteWorkList - Run the worklist algorithm for a maximum number of steps. bool CoreEngine::ExecuteWorkList(const LocationContext *L, unsigned MaxSteps, ProgramStateRef InitState) { - if (G.empty()) { - assert(!G.getRoot() && "empty graph must not have a root node"); - // Initialize the analysis by constructing the root if there are no nodes. + if (G.num_roots() == 0) { // Initialize the analysis by constructing + // the root if none exists. const CFGBlock *Entry = &(L->getCFG()->getEntry()); @@ -118,7 +117,7 @@ bool CoreEngine::ExecuteWorkList(const LocationContext *L, unsigned MaxSteps, bool IsNew; ExplodedNode *Node = G.getNode(StartLoc, InitState, false, &IsNew); assert(IsNew); - G.designateAsRoot(Node); + G.addRoot(Node); NodeBuilderContext BuilderCtx(*this, StartLoc.getDst(), Node); ExplodedNodeSet DstBegin; @@ -549,11 +548,15 @@ void CoreEngine::HandleVirtualBaseBranch(const CFGBlock *B, void CoreEngine::generateNode(const ProgramPoint &Loc, ProgramStateRef State, ExplodedNode *Pred) { - assert(Pred); bool IsNew; ExplodedNode *Node = G.getNode(Loc, State, false, &IsNew); - Node->addPredecessor(Pred, G); // Link 'Node' with its predecessor. + if (Pred) + Node->addPredecessor(Pred, G); // Link 'Node' with its predecessor. + else { + assert(IsNew); + G.addRoot(Node); // 'Node' has no predecessor. Make it a root. + } // Only add 'Node' to the worklist if it was freshly generated. if (IsNew) WList->enqueue(Node); diff --git a/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp b/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp index 098922d94061f..7b2cccce93cfe 100644 --- a/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExplodedGraph.cpp @@ -442,10 +442,6 @@ std::unique_ptr ExplodedGraph::trim(ArrayRef Sinks, InterExplodedGraphMap *ForwardMap, InterExplodedGraphMap *InverseMap) const { - // FIXME: The two-pass algorithm of this function (which was introduced in - // 2008) is terribly overcomplicated and should be replaced by a single - // (backward) pass. - if (Nodes.empty()) return nullptr; @@ -471,9 +467,8 @@ ExplodedGraph::trim(ArrayRef Sinks, if (!Pass1.insert(N).second) continue; - // If this is the root enqueue it to the second worklist. + // If this is a root enqueue it to the second worklist. if (N->Preds.empty()) { - assert(N == getRoot() && "Found non-root node with no predecessors!"); WL2.push_back(N); continue; } @@ -482,14 +477,12 @@ ExplodedGraph::trim(ArrayRef Sinks, WL1.append(N->Preds.begin(), N->Preds.end()); } - // We didn't hit the root? Return with a null pointer for the new graph. + // We didn't hit a root? Return with a null pointer for the new graph. if (WL2.empty()) return nullptr; - assert(WL2.size() == 1 && "There must be only one root!"); - // Create an empty graph. - std::unique_ptr G = std::make_unique(); + std::unique_ptr G = MakeEmptyGraph(); // ===- Pass 2 (forward DFS to construct the new graph) -=== while (!WL2.empty()) { @@ -510,11 +503,9 @@ ExplodedGraph::trim(ArrayRef Sinks, // Also record the reverse mapping from the new node to the old node. if (InverseMap) (*InverseMap)[NewN] = N; - // If this node is the root, designate it as such in the graph. - if (N->Preds.empty()) { - assert(N == getRoot()); - G->designateAsRoot(NewN); - } + // If this node is a root, designate it as such in the graph. + if (N->Preds.empty()) + G->addRoot(NewN); // In the case that some of the intended predecessors of NewN have already // been created, we should hook them up as predecessors. diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index ebad83dad0c8f..f71441a3bb49b 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -2529,7 +2529,7 @@ static const LocationContext *getInlinedLocationContext(ExplodedNode *Node, ExplodedGraph &G) { const LocationContext *CalleeLC = Node->getLocation().getLocationContext(); const LocationContext *RootLC = - G.getRoot()->getLocation().getLocationContext(); + (*G.roots_begin())->getLocation().getLocationContext(); if (CalleeLC->getStackFrame() == RootLC->getStackFrame()) return nullptr; diff --git a/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp b/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp index a469df4ca7160..a6ade661d04a2 100644 --- a/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/SymbolManager.cpp @@ -80,49 +80,6 @@ void UnarySymExpr::dumpToStream(raw_ostream &os) const { os << ')'; } -const Stmt *SymbolConjured::getStmt() const { - // Sometimes the CFG element is invalid, avoid dereferencing it. - if (Elem.getParent() == nullptr || - Elem.getIndexInBlock() >= Elem.getParent()->size()) - return nullptr; - switch (Elem->getKind()) { - case CFGElement::Initializer: - if (const auto *Init = Elem->castAs().getInitializer()) { - return Init->getInit(); - } - return nullptr; - case CFGElement::ScopeBegin: - return Elem->castAs().getTriggerStmt(); - case CFGElement::ScopeEnd: - return Elem->castAs().getTriggerStmt(); - case CFGElement::NewAllocator: - return Elem->castAs().getAllocatorExpr(); - case CFGElement::LifetimeEnds: - return Elem->castAs().getTriggerStmt(); - case CFGElement::LoopExit: - return Elem->castAs().getLoopStmt(); - case CFGElement::Statement: - return Elem->castAs().getStmt(); - case CFGElement::Constructor: - return Elem->castAs().getStmt(); - case CFGElement::CXXRecordTypedCall: - return Elem->castAs().getStmt(); - case CFGElement::AutomaticObjectDtor: - return Elem->castAs().getTriggerStmt(); - case CFGElement::DeleteDtor: - return Elem->castAs().getDeleteExpr(); - case CFGElement::BaseDtor: - return nullptr; - case CFGElement::MemberDtor: - return nullptr; - case CFGElement::TemporaryDtor: - return Elem->castAs().getBindTemporaryExpr(); - case CFGElement::CleanupFunction: - return nullptr; - } - return nullptr; -} - void SymbolConjured::dumpToStream(raw_ostream &os) const { os << getKindStr() << getSymbolID() << '{' << T << ", LC" << LCtx->getID(); if (auto *S = getStmt()) diff --git a/clang/test/AST/ByteCode/builtin-bit-cast.cpp b/clang/test/AST/ByteCode/builtin-bit-cast.cpp index 3c5e89d7d5a74..187f180afd3da 100644 --- a/clang/test/AST/ByteCode/builtin-bit-cast.cpp +++ b/clang/test/AST/ByteCode/builtin-bit-cast.cpp @@ -503,16 +503,6 @@ namespace OversizedBitField { #endif } -namespace Discarded { - enum my_byte : unsigned char {}; - struct pad { - char a; - int b; - }; - constexpr int bad_my_byte = (__builtin_bit_cast(my_byte[8], pad{1, 2}), 0); // both-error {{must be initialized by a constant expression}} \ - // both-note {{indeterminate value can only initialize an object of type 'unsigned char' or 'std::byte';}} -} - typedef bool bool9 __attribute__((ext_vector_type(9))); // both-error@+2 {{constexpr variable 'bad_bool9_to_short' must be initialized by a constant expression}} // both-note@+1 {{bit_cast involving type 'bool __attribute__((ext_vector_type(9)))' (vector of 9 'bool' values) is not allowed in a constant expression; element size 1 * element count 9 is not a multiple of the byte size 8}} diff --git a/clang/test/AST/ByteCode/cxx20.cpp b/clang/test/AST/ByteCode/cxx20.cpp index 0b2234ef83298..8fb19fcfcd3fe 100644 --- a/clang/test/AST/ByteCode/cxx20.cpp +++ b/clang/test/AST/ByteCode/cxx20.cpp @@ -997,21 +997,3 @@ namespace NastyChar { template constexpr auto to_nasty_char() { return t; } constexpr auto result = to_nasty_char<"12345">(); } - -namespace TempDtor { - struct A { - int n; - }; - constexpr A &&a_ref = A(); // both-note {{temporary created here}} - constexpr void destroy_extern_2() { // both-error {{never produces a constant expression}} - a_ref.~A(); // both-note {{destruction of temporary is not allowed in a constant expression outside the expression that created the temporary}} - } -} - -namespace OnePastEndDtor { - struct A {int n; }; - constexpr void destroy_past_end() { // both-error {{never produces a constant expression}} - A a; - (&a+1)->~A(); // both-note {{destruction of dereferenced one-past-the-end pointer}} - } -} diff --git a/clang/test/AST/HLSL/ByteAddressBuffers-AST.hlsl b/clang/test/AST/HLSL/ByteAddressBuffers-AST.hlsl index 8b9aa99a5314e..5fba939d29cfe 100644 --- a/clang/test/AST/HLSL/ByteAddressBuffers-AST.hlsl +++ b/clang/test/AST/HLSL/ByteAddressBuffers-AST.hlsl @@ -78,27 +78,5 @@ RESOURCE Buffer; // CHECK-NEXT: DeclRefExpr {{.*}} 'unsigned int' ParmVar {{.*}} 'index' 'unsigned int' // CHECK-NEXT: AlwaysInlineAttr -// Constructor from implicit binding - -// CHECK: CXXConstructorDecl {{.*}} [[RESOURCE]] 'void (unsigned int, int, unsigned int, unsigned int)' inline -// CHECK-NEXT: ParmVarDecl {{.*}} spaceNo 'unsigned int' -// CHECK-NEXT: ParmVarDecl {{.*}} range 'int' -// CHECK-NEXT: ParmVarDecl {{.*}} index 'unsigned int' -// CHECK-NEXT: ParmVarDecl {{.*}} orderId 'unsigned int' -// CHECK-NEXT: CompoundStmt {{.*}} -// CHECK-NEXT: BinaryOperator {{.*}} '=' -// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle -// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::[[RESOURCE]]' lvalue implicit this -// CHECK-NEXT: CallExpr {{.*}} '__hlsl_resource_t -// CHECK-NEXT: ImplicitCastExpr {{.*}} -// CHECK-NEXT: DeclRefExpr {{.*}} '' Function {{.*}} '__builtin_hlsl_resource_handlefromimplicitbinding' -// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle -// CHECK-NEXT: CXXThisExpr {{.*}} 'hlsl::[[RESOURCE]]' lvalue implicit this -// CHECK-NEXT: DeclRefExpr {{.*}} 'unsigned int' ParmVar {{.*}} 'spaceNo' 'unsigned int' -// CHECK-NEXT: DeclRefExpr {{.*}} 'int' ParmVar {{.*}} 'range' 'int' -// CHECK-NEXT: DeclRefExpr {{.*}} 'unsigned int' ParmVar {{.*}} 'index' 'unsigned int' -// CHECK-NEXT: DeclRefExpr {{.*}} 'unsigned int' ParmVar {{.*}} 'orderId' 'unsigned int' -// CHECK-NEXT: AlwaysInlineAttr - // CHECK-NOSUBSCRIPT-NOT: CXXMethodDecl {{.*}} operator[] 'const element_type &(unsigned int) const' // CHECK-NOSUBSCRIPT-NOT: CXXMethodDecl {{.*}} operator[] 'element_type &(unsigned int)' diff --git a/clang/test/AST/HLSL/StructuredBuffers-AST.hlsl b/clang/test/AST/HLSL/StructuredBuffers-AST.hlsl index f8659313ff19c..63265a0003582 100644 --- a/clang/test/AST/HLSL/StructuredBuffers-AST.hlsl +++ b/clang/test/AST/HLSL/StructuredBuffers-AST.hlsl @@ -125,28 +125,6 @@ RESOURCE Buffer; // CHECK-NEXT: DeclRefExpr {{.*}} 'unsigned int' ParmVar {{.*}} 'index' 'unsigned int' // CHECK-NEXT: AlwaysInlineAttr -// Constructor from implicit binding - -// CHECK: CXXConstructorDecl {{.*}} [[RESOURCE]] 'void (unsigned int, int, unsigned int, unsigned int)' inline -// CHECK-NEXT: ParmVarDecl {{.*}} spaceNo 'unsigned int' -// CHECK-NEXT: ParmVarDecl {{.*}} range 'int' -// CHECK-NEXT: ParmVarDecl {{.*}} index 'unsigned int' -// CHECK-NEXT: ParmVarDecl {{.*}} orderId 'unsigned int' -// CHECK-NEXT: CompoundStmt {{.*}} -// CHECK-NEXT: BinaryOperator {{.*}} '=' -// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle -// CHECK-NEXT: CXXThisExpr {{.*}} '[[RESOURCE]]' lvalue implicit this -// CHECK-NEXT: CallExpr {{.*}} '__hlsl_resource_t -// CHECK-NEXT: ImplicitCastExpr {{.*}} -// CHECK-NEXT: DeclRefExpr {{.*}} '' Function {{.*}} '__builtin_hlsl_resource_handlefromimplicitbinding' -// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle -// CHECK-NEXT: CXXThisExpr {{.*}} '[[RESOURCE]]' lvalue implicit this -// CHECK-NEXT: DeclRefExpr {{.*}} 'unsigned int' ParmVar {{.*}} 'spaceNo' 'unsigned int' -// CHECK-NEXT: DeclRefExpr {{.*}} 'int' ParmVar {{.*}} 'range' 'int' -// CHECK-NEXT: DeclRefExpr {{.*}} 'unsigned int' ParmVar {{.*}} 'index' 'unsigned int' -// CHECK-NEXT: DeclRefExpr {{.*}} 'unsigned int' ParmVar {{.*}} 'orderId' 'unsigned int' -// CHECK-NEXT: AlwaysInlineAttr - // Subscript operators // CHECK-SUBSCRIPT: CXXMethodDecl {{.*}} operator[] 'const hlsl_device element_type &(unsigned int) const' diff --git a/clang/test/AST/HLSL/TypedBuffers-AST.hlsl b/clang/test/AST/HLSL/TypedBuffers-AST.hlsl index dad1ef17a1f86..6074c1e8bcdd2 100644 --- a/clang/test/AST/HLSL/TypedBuffers-AST.hlsl +++ b/clang/test/AST/HLSL/TypedBuffers-AST.hlsl @@ -92,28 +92,6 @@ RESOURCE Buffer; // CHECK-NEXT: DeclRefExpr {{.*}} 'unsigned int' ParmVar {{.*}} 'index' 'unsigned int' // CHECK-NEXT: AlwaysInlineAttr -// Constructor from implicit binding - -// CHECK: CXXConstructorDecl {{.*}} [[RESOURCE]] 'void (unsigned int, int, unsigned int, unsigned int)' inline -// CHECK-NEXT: ParmVarDecl {{.*}} spaceNo 'unsigned int' -// CHECK-NEXT: ParmVarDecl {{.*}} range 'int' -// CHECK-NEXT: ParmVarDecl {{.*}} index 'unsigned int' -// CHECK-NEXT: ParmVarDecl {{.*}} orderId 'unsigned int' -// CHECK-NEXT: CompoundStmt {{.*}} -// CHECK-NEXT: BinaryOperator {{.*}} '=' -// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle -// CHECK-NEXT: CXXThisExpr {{.*}} '[[RESOURCE]]' lvalue implicit this -// CHECK-NEXT: CallExpr {{.*}} '__hlsl_resource_t -// CHECK-NEXT: ImplicitCastExpr {{.*}} -// CHECK-NEXT: DeclRefExpr {{.*}} '' Function {{.*}} '__builtin_hlsl_resource_handlefromimplicitbinding' -// CHECK-NEXT: MemberExpr {{.*}} lvalue .__handle -// CHECK-NEXT: CXXThisExpr {{.*}} '[[RESOURCE]]' lvalue implicit this -// CHECK-NEXT: DeclRefExpr {{.*}} 'unsigned int' ParmVar {{.*}} 'spaceNo' 'unsigned int' -// CHECK-NEXT: DeclRefExpr {{.*}} 'int' ParmVar {{.*}} 'range' 'int' -// CHECK-NEXT: DeclRefExpr {{.*}} 'unsigned int' ParmVar {{.*}} 'index' 'unsigned int' -// CHECK-NEXT: DeclRefExpr {{.*}} 'unsigned int' ParmVar {{.*}} 'orderId' 'unsigned int' -// CHECK-NEXT: AlwaysInlineAttr - // Subsctript operators // CHECK: CXXMethodDecl {{.*}} operator[] 'const hlsl_device element_type &(unsigned int) const' diff --git a/clang/test/Analysis/ftime-trace-no-init.cpp b/clang/test/Analysis/ftime-trace-no-init.cpp deleted file mode 100644 index 7fb289b19da78..0000000000000 --- a/clang/test/Analysis/ftime-trace-no-init.cpp +++ /dev/null @@ -1,5 +0,0 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=core,apiModeling %s -ftime-trace=%t.raw.json -verify -// expected-no-diagnostics - -// GitHub issue 139779 -struct {} a; // no-crash diff --git a/clang/test/Analysis/generate_analyzer_options_docs.test b/clang/test/Analysis/generate_analyzer_options_docs.test deleted file mode 100644 index 0c95346504ae3..0000000000000 --- a/clang/test/Analysis/generate_analyzer_options_docs.test +++ /dev/null @@ -1,14 +0,0 @@ -The documentation of analyzer options is generated by a script that parses -AnalyzerOptions.def. The following line validates that this script -"understands" everything in its input files: - -RUN: %python %src_dir/docs/tools/generate_analyzer_options_docs.py \ -RUN: --options-def %src_include_dir/clang/StaticAnalyzer/Core/AnalyzerOptions.def \ -RUN: --template %src_dir/docs/analyzer/user-docs/Options.rst.in \ -RUN: --out %t.rst - -Moreover, verify that the documentation (e.g. this fragment of the -documentation of the "mode" option) can be found in the output file: - -RUN: FileCheck --input-file=%t.rst %s -CHECK: Controls the high-level analyzer mode diff --git a/clang/test/CodeCompletion/source-loc-zero.cpp b/clang/test/CodeCompletion/source-loc-zero.cpp deleted file mode 100644 index a428c1534ffde..0000000000000 --- a/clang/test/CodeCompletion/source-loc-zero.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Regression test for #139375 -// Clang uses 1-based indexing for source locations given from the command-line. -// Verify that Clang rejects 0 as an invalid value for line or column number. - -// RUN: not %clang_cc1 -fsyntax-only -code-completion-at=%s:0:1 %s -o - 2>&1 \ -// RUN: | FileCheck -check-prefix=CHECK-DIAG %s -// RUN: not %clang_cc1 -fsyntax-only -code-completion-at=%s:1:0 %s -o - 2>&1 \ -// RUN: | FileCheck -check-prefix=CHECK-DIAG %s - -// CHECK-DIAG: error: invalid value '{{.*}}' in '-code-completion-at={{.*}}' -// CHECK-NEXT: hint: -code-completion-at=:: requires and to be integers greater than zero diff --git a/clang/test/CodeGen/AArch64/fp8-init-list.c b/clang/test/CodeGen/AArch64/fp8-init-list.c index 8b4b31a71c46a..872ee4f8a3d42 100644 --- a/clang/test/CodeGen/AArch64/fp8-init-list.c +++ b/clang/test/CodeGen/AArch64/fp8-init-list.c @@ -12,14 +12,14 @@ // CHECK-LABEL: define dso_local <8 x i8> @vector_init_test( // CHECK-SAME: <1 x i8> [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VECINIT14:%.*]] = shufflevector <1 x i8> [[X]], <1 x i8> poison, <8 x i32> zeroinitializer -// CHECK-NEXT: ret <8 x i8> [[VECINIT14]] +// CHECK-NEXT: [[VECINIT7:%.*]] = shufflevector <1 x i8> [[X]], <1 x i8> poison, <8 x i32> zeroinitializer +// CHECK-NEXT: ret <8 x i8> [[VECINIT7]] // // CHECK-CXX-LABEL: define dso_local <8 x i8> @_Z16vector_init_testu6__mfp8( // CHECK-CXX-SAME: <1 x i8> [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: [[VECINIT14:%.*]] = shufflevector <1 x i8> [[X]], <1 x i8> poison, <8 x i32> zeroinitializer -// CHECK-CXX-NEXT: ret <8 x i8> [[VECINIT14]] +// CHECK-CXX-NEXT: [[VECINIT7:%.*]] = shufflevector <1 x i8> [[X]], <1 x i8> poison, <8 x i32> zeroinitializer +// CHECK-CXX-NEXT: ret <8 x i8> [[VECINIT7]] // mfloat8x8_t vector_init_test(__mfp8 x) { return (mfloat8x8_t) {x, x, x, x, x, x, x, x}; @@ -34,13 +34,15 @@ struct S s; // CHECK-LABEL: define dso_local void @f( // CHECK-SAME: <1 x i8> [[X:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: store <1 x i8> [[X]], ptr @s, align 1, !tbaa [[TBAA2:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[X]], i64 0 +// CHECK-NEXT: store i8 [[TMP0]], ptr @s, align 1, !tbaa [[TBAA2:![0-9]+]] // CHECK-NEXT: ret void // // CHECK-CXX-LABEL: define dso_local void @_Z1fu6__mfp8( // CHECK-CXX-SAME: <1 x i8> [[X:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: store <1 x i8> [[X]], ptr @s, align 1, !tbaa [[TBAA2:![0-9]+]] +// CHECK-CXX-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[X]], i64 0 +// CHECK-CXX-NEXT: store i8 [[TMP0]], ptr @s, align 1, !tbaa [[TBAA2:![0-9]+]] // CHECK-CXX-NEXT: ret void // void f(__mfp8 x) { diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_untyped.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_untyped.c deleted file mode 100644 index fdc861836baf7..0000000000000 --- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_untyped.c +++ /dev/null @@ -1,1158 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 -#include - -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +lut -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,sroa | FileCheck %s - -// REQUIRES: aarch64-registered-target - -// CHECK-LABEL: define dso_local <8 x i8> @test_vset_lane_mf8( -// CHECK-SAME: <1 x i8> [[A:%.*]], <8 x i8> [[B:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i8> [[A]] to i8 -// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <8 x i8> [[B]], i8 [[TMP0]], i32 7 -// CHECK-NEXT: ret <8 x i8> [[VSET_LANE]] -// -mfloat8x8_t test_vset_lane_mf8(mfloat8_t a, mfloat8x8_t b) { - return vset_lane_mf8(a, b, 7); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vsetq_lane_mf8( -// CHECK-SAME: <1 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i8> [[A]] to i8 -// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <16 x i8> [[B]], i8 [[TMP0]], i32 15 -// CHECK-NEXT: ret <16 x i8> [[VSET_LANE]] -// -mfloat8x16_t test_vsetq_lane_mf8(mfloat8_t a, mfloat8x16_t b) { - return vsetq_lane_mf8(a, b, 15); -} - - -// CHECK-LABEL: define dso_local <1 x i8> @test_vget_lane_mf8( -// CHECK-SAME: <8 x i8> [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <8 x i8> [[A]], i32 7 -// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8 [[VGET_LANE]] to <1 x i8> -// CHECK-NEXT: ret <1 x i8> [[TMP0]] -// -mfloat8_t test_vget_lane_mf8(mfloat8x8_t a) { - return vget_lane_mf8(a, 7); -} - -// CHECK-LABEL: define dso_local <1 x i8> @test_vdupb_lane_mf8( -// CHECK-SAME: <8 x i8> [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <8 x i8> [[A]], i32 7 -// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8 [[VGET_LANE]] to <1 x i8> -// CHECK-NEXT: ret <1 x i8> [[TMP0]] -// -mfloat8_t test_vdupb_lane_mf8(mfloat8x8_t a) { - return vdupb_lane_mf8(a, 7); -} - -// CHECK-LABEL: define dso_local <1 x i8> @test_vgetq_lane_mf8( -// CHECK-SAME: <16 x i8> [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <16 x i8> [[A]], i32 15 -// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8 [[VGET_LANE]] to <1 x i8> -// CHECK-NEXT: ret <1 x i8> [[TMP0]] -// -mfloat8_t test_vgetq_lane_mf8(mfloat8x16_t a) { - return vgetq_lane_mf8(a, 15); -} - -// CHECK-LABEL: define dso_local <1 x i8> @test_vdupb_laneq_mf8( -// CHECK-SAME: <16 x i8> [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <16 x i8> [[A]], i32 15 -// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8 [[VGET_LANE]] to <1 x i8> -// CHECK-NEXT: ret <1 x i8> [[TMP0]] -// -mfloat8_t test_vdupb_laneq_mf8(mfloat8x16_t a) { - return vdupb_laneq_mf8(a, 15); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vcreate_mf8( -// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[A]] to <8 x i8> -// CHECK-NEXT: ret <8 x i8> [[TMP0]] -// -mfloat8x8_t test_vcreate_mf8(uint64_t a) { - return vcreate_mf8(a); -} - - -// CHECK-LABEL: define dso_local <8 x i8> @test_vdup_n_mf8( -// CHECK-SAME: <1 x i8> [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VEXT_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <8 x i32> -// CHECK-NEXT: [[VECINIT_I:%.*]] = shufflevector <8 x i8> [[VEXT_I]], <8 x i8> poison, <8 x i32> -// CHECK-NEXT: [[VEXT1_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <8 x i32> -// CHECK-NEXT: [[VECINIT2_I:%.*]] = shufflevector <8 x i8> [[VECINIT_I]], <8 x i8> [[VEXT1_I]], <8 x i32> -// CHECK-NEXT: [[VEXT3_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <8 x i32> -// CHECK-NEXT: [[VECINIT4_I:%.*]] = shufflevector <8 x i8> [[VECINIT2_I]], <8 x i8> [[VEXT3_I]], <8 x i32> -// CHECK-NEXT: [[VEXT5_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <8 x i32> -// CHECK-NEXT: [[VECINIT6_I:%.*]] = shufflevector <8 x i8> [[VECINIT4_I]], <8 x i8> [[VEXT5_I]], <8 x i32> -// CHECK-NEXT: [[VEXT7_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <8 x i32> -// CHECK-NEXT: [[VECINIT8_I:%.*]] = shufflevector <8 x i8> [[VECINIT6_I]], <8 x i8> [[VEXT7_I]], <8 x i32> -// CHECK-NEXT: [[VEXT9_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <8 x i32> -// CHECK-NEXT: [[VECINIT10_I:%.*]] = shufflevector <8 x i8> [[VECINIT8_I]], <8 x i8> [[VEXT9_I]], <8 x i32> -// CHECK-NEXT: [[VEXT11_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <8 x i32> -// CHECK-NEXT: [[VECINIT12_I:%.*]] = shufflevector <8 x i8> [[VECINIT10_I]], <8 x i8> [[VEXT11_I]], <8 x i32> -// CHECK-NEXT: [[VEXT13_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <8 x i32> -// CHECK-NEXT: [[VECINIT14_I:%.*]] = shufflevector <8 x i8> [[VECINIT12_I]], <8 x i8> [[VEXT13_I]], <8 x i32> -// CHECK-NEXT: ret <8 x i8> [[VECINIT14_I]] -// -mfloat8x8_t test_vdup_n_mf8(mfloat8_t a) { - return vdup_n_mf8(a); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vdupq_n_mf8( -// CHECK-SAME: <1 x i8> [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VEXT_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <16 x i32> -// CHECK-NEXT: [[VECINIT_I:%.*]] = shufflevector <16 x i8> [[VEXT_I]], <16 x i8> poison, <16 x i32> -// CHECK-NEXT: [[VEXT1_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <16 x i32> -// CHECK-NEXT: [[VECINIT2_I:%.*]] = shufflevector <16 x i8> [[VECINIT_I]], <16 x i8> [[VEXT1_I]], <16 x i32> -// CHECK-NEXT: [[VEXT3_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <16 x i32> -// CHECK-NEXT: [[VECINIT4_I:%.*]] = shufflevector <16 x i8> [[VECINIT2_I]], <16 x i8> [[VEXT3_I]], <16 x i32> -// CHECK-NEXT: [[VEXT5_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <16 x i32> -// CHECK-NEXT: [[VECINIT6_I:%.*]] = shufflevector <16 x i8> [[VECINIT4_I]], <16 x i8> [[VEXT5_I]], <16 x i32> -// CHECK-NEXT: [[VEXT7_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <16 x i32> -// CHECK-NEXT: [[VECINIT8_I:%.*]] = shufflevector <16 x i8> [[VECINIT6_I]], <16 x i8> [[VEXT7_I]], <16 x i32> -// CHECK-NEXT: [[VEXT9_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <16 x i32> -// CHECK-NEXT: [[VECINIT10_I:%.*]] = shufflevector <16 x i8> [[VECINIT8_I]], <16 x i8> [[VEXT9_I]], <16 x i32> -// CHECK-NEXT: [[VEXT11_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <16 x i32> -// CHECK-NEXT: [[VECINIT12_I:%.*]] = shufflevector <16 x i8> [[VECINIT10_I]], <16 x i8> [[VEXT11_I]], <16 x i32> -// CHECK-NEXT: [[VEXT13_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <16 x i32> -// CHECK-NEXT: [[VECINIT14_I:%.*]] = shufflevector <16 x i8> [[VECINIT12_I]], <16 x i8> [[VEXT13_I]], <16 x i32> -// CHECK-NEXT: [[VEXT15_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <16 x i32> -// CHECK-NEXT: [[VECINIT16_I:%.*]] = shufflevector <16 x i8> [[VECINIT14_I]], <16 x i8> [[VEXT15_I]], <16 x i32> -// CHECK-NEXT: [[VEXT17_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <16 x i32> -// CHECK-NEXT: [[VECINIT18_I:%.*]] = shufflevector <16 x i8> [[VECINIT16_I]], <16 x i8> [[VEXT17_I]], <16 x i32> -// CHECK-NEXT: [[VEXT19_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <16 x i32> -// CHECK-NEXT: [[VECINIT20_I:%.*]] = shufflevector <16 x i8> [[VECINIT18_I]], <16 x i8> [[VEXT19_I]], <16 x i32> -// CHECK-NEXT: [[VEXT21_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <16 x i32> -// CHECK-NEXT: [[VECINIT22_I:%.*]] = shufflevector <16 x i8> [[VECINIT20_I]], <16 x i8> [[VEXT21_I]], <16 x i32> -// CHECK-NEXT: [[VEXT23_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <16 x i32> -// CHECK-NEXT: [[VECINIT24_I:%.*]] = shufflevector <16 x i8> [[VECINIT22_I]], <16 x i8> [[VEXT23_I]], <16 x i32> -// CHECK-NEXT: [[VEXT25_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <16 x i32> -// CHECK-NEXT: [[VECINIT26_I:%.*]] = shufflevector <16 x i8> [[VECINIT24_I]], <16 x i8> [[VEXT25_I]], <16 x i32> -// CHECK-NEXT: [[VEXT27_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <16 x i32> -// CHECK-NEXT: [[VECINIT28_I:%.*]] = shufflevector <16 x i8> [[VECINIT26_I]], <16 x i8> [[VEXT27_I]], <16 x i32> -// CHECK-NEXT: [[VEXT29_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <16 x i32> -// CHECK-NEXT: [[VECINIT30_I:%.*]] = shufflevector <16 x i8> [[VECINIT28_I]], <16 x i8> [[VEXT29_I]], <16 x i32> -// CHECK-NEXT: ret <16 x i8> [[VECINIT30_I]] -// -mfloat8x16_t test_vdupq_n_mf8(mfloat8_t a) { - return vdupq_n_mf8(a); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vmov_n_mf8( -// CHECK-SAME: <1 x i8> [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VEXT_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <8 x i32> -// CHECK-NEXT: [[VECINIT_I:%.*]] = shufflevector <8 x i8> [[VEXT_I]], <8 x i8> poison, <8 x i32> -// CHECK-NEXT: [[VEXT1_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <8 x i32> -// CHECK-NEXT: [[VECINIT2_I:%.*]] = shufflevector <8 x i8> [[VECINIT_I]], <8 x i8> [[VEXT1_I]], <8 x i32> -// CHECK-NEXT: [[VEXT3_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <8 x i32> -// CHECK-NEXT: [[VECINIT4_I:%.*]] = shufflevector <8 x i8> [[VECINIT2_I]], <8 x i8> [[VEXT3_I]], <8 x i32> -// CHECK-NEXT: [[VEXT5_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <8 x i32> -// CHECK-NEXT: [[VECINIT6_I:%.*]] = shufflevector <8 x i8> [[VECINIT4_I]], <8 x i8> [[VEXT5_I]], <8 x i32> -// CHECK-NEXT: [[VEXT7_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <8 x i32> -// CHECK-NEXT: [[VECINIT8_I:%.*]] = shufflevector <8 x i8> [[VECINIT6_I]], <8 x i8> [[VEXT7_I]], <8 x i32> -// CHECK-NEXT: [[VEXT9_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <8 x i32> -// CHECK-NEXT: [[VECINIT10_I:%.*]] = shufflevector <8 x i8> [[VECINIT8_I]], <8 x i8> [[VEXT9_I]], <8 x i32> -// CHECK-NEXT: [[VEXT11_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <8 x i32> -// CHECK-NEXT: [[VECINIT12_I:%.*]] = shufflevector <8 x i8> [[VECINIT10_I]], <8 x i8> [[VEXT11_I]], <8 x i32> -// CHECK-NEXT: [[VEXT13_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <8 x i32> -// CHECK-NEXT: [[VECINIT14_I:%.*]] = shufflevector <8 x i8> [[VECINIT12_I]], <8 x i8> [[VEXT13_I]], <8 x i32> -// CHECK-NEXT: ret <8 x i8> [[VECINIT14_I]] -// -mfloat8x8_t test_vmov_n_mf8(mfloat8_t a) { - return vmov_n_mf8(a); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vmovq_n_mf8( -// CHECK-SAME: <1 x i8> [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VEXT_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <16 x i32> -// CHECK-NEXT: [[VECINIT_I:%.*]] = shufflevector <16 x i8> [[VEXT_I]], <16 x i8> poison, <16 x i32> -// CHECK-NEXT: [[VEXT1_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <16 x i32> -// CHECK-NEXT: [[VECINIT2_I:%.*]] = shufflevector <16 x i8> [[VECINIT_I]], <16 x i8> [[VEXT1_I]], <16 x i32> -// CHECK-NEXT: [[VEXT3_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <16 x i32> -// CHECK-NEXT: [[VECINIT4_I:%.*]] = shufflevector <16 x i8> [[VECINIT2_I]], <16 x i8> [[VEXT3_I]], <16 x i32> -// CHECK-NEXT: [[VEXT5_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <16 x i32> -// CHECK-NEXT: [[VECINIT6_I:%.*]] = shufflevector <16 x i8> [[VECINIT4_I]], <16 x i8> [[VEXT5_I]], <16 x i32> -// CHECK-NEXT: [[VEXT7_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <16 x i32> -// CHECK-NEXT: [[VECINIT8_I:%.*]] = shufflevector <16 x i8> [[VECINIT6_I]], <16 x i8> [[VEXT7_I]], <16 x i32> -// CHECK-NEXT: [[VEXT9_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <16 x i32> -// CHECK-NEXT: [[VECINIT10_I:%.*]] = shufflevector <16 x i8> [[VECINIT8_I]], <16 x i8> [[VEXT9_I]], <16 x i32> -// CHECK-NEXT: [[VEXT11_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <16 x i32> -// CHECK-NEXT: [[VECINIT12_I:%.*]] = shufflevector <16 x i8> [[VECINIT10_I]], <16 x i8> [[VEXT11_I]], <16 x i32> -// CHECK-NEXT: [[VEXT13_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <16 x i32> -// CHECK-NEXT: [[VECINIT14_I:%.*]] = shufflevector <16 x i8> [[VECINIT12_I]], <16 x i8> [[VEXT13_I]], <16 x i32> -// CHECK-NEXT: [[VEXT15_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <16 x i32> -// CHECK-NEXT: [[VECINIT16_I:%.*]] = shufflevector <16 x i8> [[VECINIT14_I]], <16 x i8> [[VEXT15_I]], <16 x i32> -// CHECK-NEXT: [[VEXT17_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <16 x i32> -// CHECK-NEXT: [[VECINIT18_I:%.*]] = shufflevector <16 x i8> [[VECINIT16_I]], <16 x i8> [[VEXT17_I]], <16 x i32> -// CHECK-NEXT: [[VEXT19_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <16 x i32> -// CHECK-NEXT: [[VECINIT20_I:%.*]] = shufflevector <16 x i8> [[VECINIT18_I]], <16 x i8> [[VEXT19_I]], <16 x i32> -// CHECK-NEXT: [[VEXT21_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <16 x i32> -// CHECK-NEXT: [[VECINIT22_I:%.*]] = shufflevector <16 x i8> [[VECINIT20_I]], <16 x i8> [[VEXT21_I]], <16 x i32> -// CHECK-NEXT: [[VEXT23_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <16 x i32> -// CHECK-NEXT: [[VECINIT24_I:%.*]] = shufflevector <16 x i8> [[VECINIT22_I]], <16 x i8> [[VEXT23_I]], <16 x i32> -// CHECK-NEXT: [[VEXT25_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <16 x i32> -// CHECK-NEXT: [[VECINIT26_I:%.*]] = shufflevector <16 x i8> [[VECINIT24_I]], <16 x i8> [[VEXT25_I]], <16 x i32> -// CHECK-NEXT: [[VEXT27_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <16 x i32> -// CHECK-NEXT: [[VECINIT28_I:%.*]] = shufflevector <16 x i8> [[VECINIT26_I]], <16 x i8> [[VEXT27_I]], <16 x i32> -// CHECK-NEXT: [[VEXT29_I:%.*]] = shufflevector <1 x i8> [[A]], <1 x i8> poison, <16 x i32> -// CHECK-NEXT: [[VECINIT30_I:%.*]] = shufflevector <16 x i8> [[VECINIT28_I]], <16 x i8> [[VEXT29_I]], <16 x i32> -// CHECK-NEXT: ret <16 x i8> [[VECINIT30_I]] -// -mfloat8x16_t test_vmovq_n_mf8(mfloat8_t a) { - return vmovq_n_mf8(a); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vcombine_mf8( -// CHECK-SAME: <8 x i8> [[A:%.*]], <8 x i8> [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <16 x i32> -// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]] -// -mfloat8x16_t test_vcombine_mf8(mfloat8x8_t a, mfloat8x8_t b) { - return vcombine_mf8(a, b); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vget_high_mf8( -// CHECK-SAME: <16 x i8> [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> -// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]] -// -mfloat8x8_t test_vget_high_mf8(mfloat8x16_t a) { - return vget_high_mf8(a); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vget_low_mf8( -// CHECK-SAME: <16 x i8> [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> -// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]] -// -mfloat8x8_t test_vget_low_mf8(mfloat8x16_t a) { - return vget_low_mf8(a); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vtbl1_mf8( -// CHECK-SAME: <8 x i8> [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> zeroinitializer, <16 x i32> -// CHECK-NEXT: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[B]]) -// CHECK-NEXT: ret <8 x i8> [[VTBL11_I]] -// -mfloat8x8_t test_vtbl1_mf8(mfloat8x8_t a, uint8x8_t b) { - return vtbl1_mf8(a, b); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vtbl2_mf8( -// CHECK-SAME: [2 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[A_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[A_COERCE]], 0 -// CHECK-NEXT: [[A_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[A_COERCE]], 1 -// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [2 x <8 x i8>] poison, <8 x i8> [[A_COERCE_FCA_0_EXTRACT]], 0 -// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x <8 x i8>] [[DOTFCA_0_INSERT]], <8 x i8> [[A_COERCE_FCA_1_EXTRACT]], 1 -// CHECK-NEXT: [[DOTFCA_1_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[DOTFCA_1_INSERT]], 0 -// CHECK-NEXT: [[DOTFCA_1_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[DOTFCA_1_INSERT]], 1 -// CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[DOTFCA_1_INSERT_FCA_0_EXTRACT]], <8 x i8> [[DOTFCA_1_INSERT_FCA_1_EXTRACT]], <16 x i32> -// CHECK-NEXT: [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[B]]) -// CHECK-NEXT: ret <8 x i8> [[VTBL13_I]] -// -mfloat8x8_t test_vtbl2_mf8(mfloat8x8x2_t a, uint8x8_t b) { - return vtbl2_mf8(a, b); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vtbl3_mf8( -// CHECK-SAME: [3 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[A_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[A_COERCE]], 0 -// CHECK-NEXT: [[A_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[A_COERCE]], 1 -// CHECK-NEXT: [[A_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[A_COERCE]], 2 -// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [3 x <8 x i8>] poison, <8 x i8> [[A_COERCE_FCA_0_EXTRACT]], 0 -// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [3 x <8 x i8>] [[DOTFCA_0_INSERT]], <8 x i8> [[A_COERCE_FCA_1_EXTRACT]], 1 -// CHECK-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [3 x <8 x i8>] [[DOTFCA_1_INSERT]], <8 x i8> [[A_COERCE_FCA_2_EXTRACT]], 2 -// CHECK-NEXT: [[DOTFCA_2_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[DOTFCA_2_INSERT]], 0 -// CHECK-NEXT: [[DOTFCA_2_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[DOTFCA_2_INSERT]], 1 -// CHECK-NEXT: [[DOTFCA_2_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[DOTFCA_2_INSERT]], 2 -// CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[DOTFCA_2_INSERT_FCA_0_EXTRACT]], <8 x i8> [[DOTFCA_2_INSERT_FCA_1_EXTRACT]], <16 x i32> -// CHECK-NEXT: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[DOTFCA_2_INSERT_FCA_2_EXTRACT]], <8 x i8> zeroinitializer, <16 x i32> -// CHECK-NEXT: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[B]]) -// CHECK-NEXT: ret <8 x i8> [[VTBL26_I]] -// -mfloat8x8_t test_vtbl3_mf8(mfloat8x8x3_t a, uint8x8_t b) { - return vtbl3_mf8(a, b); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vtbl4_mf8( -// CHECK-SAME: [4 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[A_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[A_COERCE]], 0 -// CHECK-NEXT: [[A_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[A_COERCE]], 1 -// CHECK-NEXT: [[A_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[A_COERCE]], 2 -// CHECK-NEXT: [[A_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[A_COERCE]], 3 -// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [4 x <8 x i8>] poison, <8 x i8> [[A_COERCE_FCA_0_EXTRACT]], 0 -// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [4 x <8 x i8>] [[DOTFCA_0_INSERT]], <8 x i8> [[A_COERCE_FCA_1_EXTRACT]], 1 -// CHECK-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [4 x <8 x i8>] [[DOTFCA_1_INSERT]], <8 x i8> [[A_COERCE_FCA_2_EXTRACT]], 2 -// CHECK-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [4 x <8 x i8>] [[DOTFCA_2_INSERT]], <8 x i8> [[A_COERCE_FCA_3_EXTRACT]], 3 -// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[DOTFCA_3_INSERT]], 0 -// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[DOTFCA_3_INSERT]], 1 -// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[DOTFCA_3_INSERT]], 2 -// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[DOTFCA_3_INSERT]], 3 -// CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[DOTFCA_3_INSERT_FCA_0_EXTRACT]], <8 x i8> [[DOTFCA_3_INSERT_FCA_1_EXTRACT]], <16 x i32> -// CHECK-NEXT: [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[DOTFCA_3_INSERT_FCA_2_EXTRACT]], <8 x i8> [[DOTFCA_3_INSERT_FCA_3_EXTRACT]], <16 x i32> -// CHECK-NEXT: [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> [[B]]) -// CHECK-NEXT: ret <8 x i8> [[VTBL28_I]] -// -mfloat8x8_t test_vtbl4_mf8(mfloat8x8x4_t a, uint8x8_t b) { - return vtbl4_mf8(a, b); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vtbx1_mf8( -// CHECK-SAME: <8 x i8> [[A:%.*]], <8 x i8> [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> zeroinitializer, <16 x i32> -// CHECK-NEXT: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[C]]) -// CHECK-NEXT: [[TMP0:%.*]] = icmp uge <8 x i8> [[C]], splat (i8 8) -// CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[A]] -// CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], splat (i8 -1) -// CHECK-NEXT: [[TMP4:%.*]] = and <8 x i8> [[TMP3]], [[VTBL11_I]] -// CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP2]], [[TMP4]] -// CHECK-NEXT: ret <8 x i8> [[VTBX_I]] -// -mfloat8x8_t test_vtbx1_mf8(mfloat8x8_t a, mfloat8x8_t b, uint8x8_t c) { - return vtbx1_mf8(a, b, c); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vtbx2_mf8( -// CHECK-SAME: <8 x i8> [[A:%.*]], [2 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 0 -// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 1 -// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [2 x <8 x i8>] poison, <8 x i8> [[B_COERCE_FCA_0_EXTRACT]], 0 -// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x <8 x i8>] [[DOTFCA_0_INSERT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], 1 -// CHECK-NEXT: [[DOTFCA_1_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[DOTFCA_1_INSERT]], 0 -// CHECK-NEXT: [[DOTFCA_1_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[DOTFCA_1_INSERT]], 1 -// CHECK-NEXT: [[VTBX1_I:%.*]] = shufflevector <8 x i8> [[DOTFCA_1_INSERT_FCA_0_EXTRACT]], <8 x i8> [[DOTFCA_1_INSERT_FCA_1_EXTRACT]], <16 x i32> -// CHECK-NEXT: [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[VTBX1_I]], <8 x i8> [[C]]) -// CHECK-NEXT: ret <8 x i8> [[VTBX13_I]] -// -mfloat8x8_t test_vtbx2_mf8(mfloat8x8_t a, mfloat8x8x2_t b, uint8x8_t c) { - return vtbx2_mf8(a, b, c); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vtbx3_mf8( -// CHECK-SAME: <8 x i8> [[A:%.*]], [3 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 0 -// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 1 -// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 2 -// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [3 x <8 x i8>] poison, <8 x i8> [[B_COERCE_FCA_0_EXTRACT]], 0 -// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [3 x <8 x i8>] [[DOTFCA_0_INSERT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], 1 -// CHECK-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [3 x <8 x i8>] [[DOTFCA_1_INSERT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], 2 -// CHECK-NEXT: [[DOTFCA_2_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[DOTFCA_2_INSERT]], 0 -// CHECK-NEXT: [[DOTFCA_2_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[DOTFCA_2_INSERT]], 1 -// CHECK-NEXT: [[DOTFCA_2_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[DOTFCA_2_INSERT]], 2 -// CHECK-NEXT: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[DOTFCA_2_INSERT_FCA_0_EXTRACT]], <8 x i8> [[DOTFCA_2_INSERT_FCA_1_EXTRACT]], <16 x i32> -// CHECK-NEXT: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[DOTFCA_2_INSERT_FCA_2_EXTRACT]], <8 x i8> zeroinitializer, <16 x i32> -// CHECK-NEXT: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[C]]) -// CHECK-NEXT: [[TMP0:%.*]] = icmp uge <8 x i8> [[C]], splat (i8 24) -// CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[A]] -// CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], splat (i8 -1) -// CHECK-NEXT: [[TMP4:%.*]] = and <8 x i8> [[TMP3]], [[VTBL26_I]] -// CHECK-NEXT: [[VTBX_I:%.*]] = or <8 x i8> [[TMP2]], [[TMP4]] -// CHECK-NEXT: ret <8 x i8> [[VTBX_I]] -// -mfloat8x8_t test_vtbx3_mf8(mfloat8x8_t a, mfloat8x8x3_t b, uint8x8_t c) { - return vtbx3_mf8(a, b, c); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vtbx4_mf8( -// CHECK-SAME: <8 x i8> [[A:%.*]], [4 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 0 -// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 1 -// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 2 -// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 3 -// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [4 x <8 x i8>] poison, <8 x i8> [[B_COERCE_FCA_0_EXTRACT]], 0 -// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [4 x <8 x i8>] [[DOTFCA_0_INSERT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], 1 -// CHECK-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [4 x <8 x i8>] [[DOTFCA_1_INSERT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], 2 -// CHECK-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [4 x <8 x i8>] [[DOTFCA_2_INSERT]], <8 x i8> [[B_COERCE_FCA_3_EXTRACT]], 3 -// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[DOTFCA_3_INSERT]], 0 -// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[DOTFCA_3_INSERT]], 1 -// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[DOTFCA_3_INSERT]], 2 -// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[DOTFCA_3_INSERT]], 3 -// CHECK-NEXT: [[VTBX2_I:%.*]] = shufflevector <8 x i8> [[DOTFCA_3_INSERT_FCA_0_EXTRACT]], <8 x i8> [[DOTFCA_3_INSERT_FCA_1_EXTRACT]], <16 x i32> -// CHECK-NEXT: [[VTBX27_I:%.*]] = shufflevector <8 x i8> [[DOTFCA_3_INSERT_FCA_2_EXTRACT]], <8 x i8> [[DOTFCA_3_INSERT_FCA_3_EXTRACT]], <16 x i32> -// CHECK-NEXT: [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> [[C]]) -// CHECK-NEXT: ret <8 x i8> [[VTBX28_I]] -// -mfloat8x8_t test_vtbx4_mf8(mfloat8x8_t a, mfloat8x8x4_t b, uint8x8_t c) { - return vtbx4_mf8(a, b, c); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vext_mf8( -// CHECK-SAME: <8 x i8> [[A:%.*]], <8 x i8> [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> -// CHECK-NEXT: ret <8 x i8> [[VEXT]] -// -mfloat8x8_t test_vext_mf8(mfloat8x8_t a, mfloat8x8_t b) { - return vext_mf8(a, b, 7); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vextq_mf8( -// CHECK-SAME: <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> -// CHECK-NEXT: ret <16 x i8> [[VEXT]] -// -mfloat8x16_t test_vextq_mf8(mfloat8x16_t a, mfloat8x16_t b) { - return vextq_mf8(a, b, 7); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vrev64_mf8( -// CHECK-SAME: <8 x i8> [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[A]], <8 x i32> -// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]] -// -mfloat8x8_t test_vrev64_mf8(mfloat8x8_t a) { - return vrev64_mf8(a); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vrev64q_mf8( -// CHECK-SAME: <16 x i8> [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <16 x i32> -// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]] -// -mfloat8x16_t test_vrev64q_mf8(mfloat8x16_t a) { - return vrev64q_mf8(a); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vrev32_mf8( -// CHECK-SAME: <8 x i8> [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[A]], <8 x i32> -// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]] -// -mfloat8x8_t test_vrev32_mf8(mfloat8x8_t a) { - return vrev32_mf8(a); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vrev32q_mf8( -// CHECK-SAME: <16 x i8> [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <16 x i32> -// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]] -// -mfloat8x16_t test_vrev32q_mf8(mfloat8x16_t a) { - return vrev32q_mf8(a); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vrev16_mf8( -// CHECK-SAME: <8 x i8> [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[A]], <8 x i32> -// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]] -// -mfloat8x8_t test_vrev16_mf8(mfloat8x8_t a) { - return vrev16_mf8(a); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vrev16q_mf8( -// CHECK-SAME: <16 x i8> [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <16 x i32> -// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]] -// -mfloat8x16_t test_vrev16q_mf8(mfloat8x16_t a) { - return vrev16q_mf8(a); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vbsl_mf8( -// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> [[V2:%.*]], <8 x i8> [[V3:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VBSL_I:%.*]] = and <8 x i8> [[V1]], [[V2]] -// CHECK-NEXT: [[TMP0:%.*]] = xor <8 x i8> [[V1]], splat (i8 -1) -// CHECK-NEXT: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], [[V3]] -// CHECK-NEXT: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]] -// CHECK-NEXT: ret <8 x i8> [[VBSL2_I]] -// -mfloat8x8_t test_vbsl_mf8(uint8x8_t v1, mfloat8x8_t v2, mfloat8x8_t v3) { - return vbsl_mf8(v1, v2, v3); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vbslq_mf8( -// CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> [[V2:%.*]], <16 x i8> [[V3:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VBSL_I:%.*]] = and <16 x i8> [[V1]], [[V2]] -// CHECK-NEXT: [[TMP0:%.*]] = xor <16 x i8> [[V1]], splat (i8 -1) -// CHECK-NEXT: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], [[V3]] -// CHECK-NEXT: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]] -// CHECK-NEXT: ret <16 x i8> [[VBSL2_I]] -// -mfloat8x16_t test_vbslq_mf8(uint8x16_t v1, mfloat8x16_t v2, mfloat8x16_t v3) { - return vbslq_mf8(v1, v2, v3); -} - -// CHECK-LABEL: define dso_local %struct.mfloat8x8x2_t @test_vtrn_mf8( -// CHECK-SAME: <8 x i8> [[A:%.*]], <8 x i8> [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> -// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> -// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T:%.*]] poison, <8 x i8> [[VTRN_I]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_0_INSERT1]], <8 x i8> [[VTRN1_I]], 0, 1 -// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_1_INSERT2]], 0 -// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[TMP0]], 0 -// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[TMP0]], 1 -// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T]] poison, <8 x i8> [[DOTFCA_0_EXTRACT]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[DOTFCA_1_EXTRACT]], 0, 1 -// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_1_INSERT]] -// -mfloat8x8x2_t test_vtrn_mf8(mfloat8x8_t a, mfloat8x8_t b) { - return vtrn_mf8(a, b); -} - -// CHECK-LABEL: define dso_local %struct.mfloat8x16x2_t @test_vtrnq_mf8( -// CHECK-SAME: <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> -// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> -// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T:%.*]] poison, <16 x i8> [[VTRN_I]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_0_INSERT1]], <16 x i8> [[VTRN1_I]], 0, 1 -// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_1_INSERT2]], 0 -// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[TMP0]], 0 -// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[TMP0]], 1 -// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T]] poison, <16 x i8> [[DOTFCA_0_EXTRACT]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[DOTFCA_1_EXTRACT]], 0, 1 -// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_1_INSERT]] -// -mfloat8x16x2_t test_vtrnq_mf8(mfloat8x16_t a, mfloat8x16_t b) { - return vtrnq_mf8(a, b); -} - -// CHECK-LABEL: define dso_local %struct.mfloat8x8x2_t @test_vzip_mf8( -// CHECK-SAME: <8 x i8> [[A:%.*]], <8 x i8> [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> -// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> -// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T:%.*]] poison, <8 x i8> [[VZIP_I]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_0_INSERT1]], <8 x i8> [[VZIP1_I]], 0, 1 -// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_1_INSERT2]], 0 -// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[TMP0]], 0 -// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[TMP0]], 1 -// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T]] poison, <8 x i8> [[DOTFCA_0_EXTRACT]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[DOTFCA_1_EXTRACT]], 0, 1 -// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_1_INSERT]] -// -mfloat8x8x2_t test_vzip_mf8(mfloat8x8_t a, mfloat8x8_t b) { - return vzip_mf8(a, b); -} - -// CHECK-LABEL: define dso_local %struct.mfloat8x16x2_t @test_vzipq_mf8( -// CHECK-SAME: <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> -// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> -// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T:%.*]] poison, <16 x i8> [[VZIP_I]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_0_INSERT1]], <16 x i8> [[VZIP1_I]], 0, 1 -// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_1_INSERT2]], 0 -// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[TMP0]], 0 -// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[TMP0]], 1 -// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T]] poison, <16 x i8> [[DOTFCA_0_EXTRACT]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[DOTFCA_1_EXTRACT]], 0, 1 -// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_1_INSERT]] -// -mfloat8x16x2_t test_vzipq_mf8(mfloat8x16_t a, mfloat8x16_t b) { - return vzipq_mf8(a, b); -} - -// CHECK-LABEL: define dso_local %struct.mfloat8x8x2_t @test_vuzp_mf8( -// CHECK-SAME: <8 x i8> [[A:%.*]], <8 x i8> [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> -// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> -// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T:%.*]] poison, <8 x i8> [[VUZP_I]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_0_INSERT1]], <8 x i8> [[VUZP1_I]], 0, 1 -// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_1_INSERT2]], 0 -// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[TMP0]], 0 -// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[TMP0]], 1 -// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T]] poison, <8 x i8> [[DOTFCA_0_EXTRACT]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[DOTFCA_1_EXTRACT]], 0, 1 -// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_1_INSERT]] -// -mfloat8x8x2_t test_vuzp_mf8(mfloat8x8_t a, mfloat8x8_t b) { - return vuzp_mf8(a, b); -} - -// CHECK-LABEL: define dso_local %struct.mfloat8x16x2_t @test_vuzpq_mf8( -// CHECK-SAME: <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> -// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> -// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T:%.*]] poison, <16 x i8> [[VUZP_I]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_0_INSERT1]], <16 x i8> [[VUZP1_I]], 0, 1 -// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_1_INSERT2]], 0 -// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[TMP0]], 0 -// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[TMP0]], 1 -// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T]] poison, <16 x i8> [[DOTFCA_0_EXTRACT]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[DOTFCA_1_EXTRACT]], 0, 1 -// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_1_INSERT]] -// -mfloat8x16x2_t test_vuzpq_mf8(mfloat8x16_t a, mfloat8x16_t b) { - return vuzpq_mf8(a, b); -} - -// CHECK-LABEL: define dso_local void @test_vcopy_lane_mf8( -// CHECK-SAME: <8 x i8> [[ARG_I8X8:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <8 x i8> [[ARG_I8X8]], i32 0 -// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8 [[VGET_LANE]] to <1 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i8> [[TMP0]] to i8 -// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <8 x i8> [[ARG_I8X8]], i8 [[TMP1]], i32 0 -// CHECK-NEXT: ret void -// -void test_vcopy_lane_mf8(mfloat8x8_t arg_i8x8) { - vcopy_lane_mf8(arg_i8x8, 0, arg_i8x8, 0); -} - -// CHECK-LABEL: define dso_local void @test_vcopyq_lane_mf8( -// CHECK-SAME: <8 x i8> [[ARG_I8X8:%.*]], <16 x i8> [[ARG_I8X16:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <8 x i8> [[ARG_I8X8]], i32 0 -// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8 [[VGET_LANE]] to <1 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i8> [[TMP0]] to i8 -// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <16 x i8> [[ARG_I8X16]], i8 [[TMP1]], i32 0 -// CHECK-NEXT: ret void -// -void test_vcopyq_lane_mf8(mfloat8x8_t arg_i8x8, mfloat8x16_t arg_i8x16) { - vcopyq_lane_mf8(arg_i8x16, 0, arg_i8x8, 0); -} - -// CHECK-LABEL: define dso_local void @test_vcopy_laneq_mf8( -// CHECK-SAME: <8 x i8> [[ARG_I8X8:%.*]], <16 x i8> [[ARG_I8X16:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <16 x i8> [[ARG_I8X16]], i32 0 -// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8 [[VGET_LANE]] to <1 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i8> [[TMP0]] to i8 -// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <8 x i8> [[ARG_I8X8]], i8 [[TMP1]], i32 0 -// CHECK-NEXT: ret void -// -void test_vcopy_laneq_mf8(mfloat8x8_t arg_i8x8, mfloat8x16_t arg_i8x16) { - vcopy_laneq_mf8(arg_i8x8, 0, arg_i8x16, 0); -} - -// CHECK-LABEL: define dso_local void @test_vcopyq_laneq_mf8( -// CHECK-SAME: <16 x i8> [[ARG_I8X16:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <16 x i8> [[ARG_I8X16]], i32 0 -// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8 [[VGET_LANE]] to <1 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i8> [[TMP0]] to i8 -// CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <16 x i8> [[ARG_I8X16]], i8 [[TMP1]], i32 0 -// CHECK-NEXT: ret void -// -void test_vcopyq_laneq_mf8(mfloat8x16_t arg_i8x16) { - vcopyq_laneq_mf8(arg_i8x16, 0, arg_i8x16, 0); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vdup_lane_mf8( -// CHECK-SAME: <8 x i8> [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[A]], <8 x i32> -// CHECK-NEXT: ret <8 x i8> [[LANE]] -// -mfloat8x8_t test_vdup_lane_mf8(mfloat8x8_t a) { - return vdup_lane_mf8(a, 7); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vdupq_lane_mf8( -// CHECK-SAME: <8 x i8> [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[A]], <16 x i32> -// CHECK-NEXT: ret <16 x i8> [[LANE]] -// -mfloat8x16_t test_vdupq_lane_mf8(mfloat8x8_t a) { - return vdupq_lane_mf8(a, 7); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vdup_laneq_mf8( -// CHECK-SAME: <16 x i8> [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[LANE:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> -// CHECK-NEXT: ret <8 x i8> [[LANE]] -// -mfloat8x8_t test_vdup_laneq_mf8(mfloat8x16_t a) { - return vdup_laneq_mf8(a, 7); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vdupq_laneq_mf8( -// CHECK-SAME: <16 x i8> [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[LANE:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <16 x i32> -// CHECK-NEXT: ret <16 x i8> [[LANE]] -// -mfloat8x16_t test_vdupq_laneq_mf8(mfloat8x16_t a) { - return vdupq_laneq_mf8(a, 7); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vtrn1_mf8( -// CHECK-SAME: <8 x i8> [[A:%.*]], <8 x i8> [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> -// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]] -// -mfloat8x8_t test_vtrn1_mf8(mfloat8x8_t a, mfloat8x8_t b) { - return vtrn1_mf8(a, b); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vtrn1q_mf8( -// CHECK-SAME: <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> -// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]] -// -mfloat8x16_t test_vtrn1q_mf8(mfloat8x16_t a, mfloat8x16_t b) { - return vtrn1q_mf8(a, b); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vzip1_mf8( -// CHECK-SAME: <8 x i8> [[A:%.*]], <8 x i8> [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> -// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]] -// -mfloat8x8_t test_vzip1_mf8(mfloat8x8_t a, mfloat8x8_t b) { - return vzip1_mf8(a, b); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vzip1q_mf8( -// CHECK-SAME: <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> -// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]] -// -mfloat8x16_t test_vzip1q_mf8(mfloat8x16_t a, mfloat8x16_t b) { - return vzip1q_mf8(a, b); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vuzp1_mf8( -// CHECK-SAME: <8 x i8> [[A:%.*]], <8 x i8> [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> -// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]] -// -mfloat8x8_t test_vuzp1_mf8(mfloat8x8_t a, mfloat8x8_t b) { - return vuzp1_mf8(a, b); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vuzp1q_mf8( -// CHECK-SAME: <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> -// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]] -// -mfloat8x16_t test_vuzp1q_mf8(mfloat8x16_t a, mfloat8x16_t b) { - return vuzp1q_mf8(a, b); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vtrn2_mf8( -// CHECK-SAME: <8 x i8> [[A:%.*]], <8 x i8> [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> -// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]] -// -mfloat8x8_t test_vtrn2_mf8(mfloat8x8_t a, mfloat8x8_t b) { - return vtrn2_mf8(a, b); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vtrn2q_mf8( -// CHECK-SAME: <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> -// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]] -// -mfloat8x16_t test_vtrn2q_mf8(mfloat8x16_t a, mfloat8x16_t b) { - return vtrn2q_mf8(a, b); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vzip2_mf8( -// CHECK-SAME: <8 x i8> [[A:%.*]], <8 x i8> [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> -// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]] -// -mfloat8x8_t test_vzip2_mf8(mfloat8x8_t a, mfloat8x8_t b) { - return vzip2_mf8(a, b); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vzip2q_mf8( -// CHECK-SAME: <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> -// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]] -// -mfloat8x16_t test_vzip2q_mf8(mfloat8x16_t a, mfloat8x16_t b) { - return vzip2q_mf8(a, b); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vuzp2_mf8( -// CHECK-SAME: <8 x i8> [[A:%.*]], <8 x i8> [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> -// CHECK-NEXT: ret <8 x i8> [[SHUFFLE_I]] -// -mfloat8x8_t test_vuzp2_mf8(mfloat8x8_t a, mfloat8x8_t b) { - return vuzp2_mf8(a, b); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vuzp2q_mf8( -// CHECK-SAME: <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> -// CHECK-NEXT: ret <16 x i8> [[SHUFFLE_I]] -// -mfloat8x16_t test_vuzp2q_mf8(mfloat8x16_t a, mfloat8x16_t b) { - return vuzp2q_mf8(a, b); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vqtbl1_mf8( -// CHECK-SAME: <16 x i8> [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[A]], <8 x i8> [[B]]) -// CHECK-NEXT: ret <8 x i8> [[VTBL1_I]] -// -mfloat8x8_t test_vqtbl1_mf8(mfloat8x16_t a, uint8x8_t b) { - return vqtbl1_mf8(a, b); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vqtbl1q_mf8( -// CHECK-SAME: <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]]) -// CHECK-NEXT: ret <16 x i8> [[VTBL1_I]] -// -mfloat8x16_t test_vqtbl1q_mf8(mfloat8x16_t a, mfloat8x16_t b) { - return vqtbl1q_mf8(a, b); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vqtbl2_mf8( -// CHECK-SAME: [2 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[A_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[A_COERCE]], 0 -// CHECK-NEXT: [[A_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[A_COERCE]], 1 -// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [2 x <16 x i8>] poison, <16 x i8> [[A_COERCE_FCA_0_EXTRACT]], 0 -// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x <16 x i8>] [[DOTFCA_0_INSERT]], <16 x i8> [[A_COERCE_FCA_1_EXTRACT]], 1 -// CHECK-NEXT: [[DOTFCA_1_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[DOTFCA_1_INSERT]], 0 -// CHECK-NEXT: [[DOTFCA_1_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[DOTFCA_1_INSERT]], 1 -// CHECK-NEXT: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[DOTFCA_1_INSERT_FCA_0_EXTRACT]], <16 x i8> [[DOTFCA_1_INSERT_FCA_1_EXTRACT]], <8 x i8> [[B]]) -// CHECK-NEXT: ret <8 x i8> [[VTBL2_I]] -// -mfloat8x8_t test_vqtbl2_mf8(mfloat8x16x2_t a, uint8x8_t b) { - return vqtbl2_mf8(a, b); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vqtbl2q_mf8( -// CHECK-SAME: [2 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[A_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[A_COERCE]], 0 -// CHECK-NEXT: [[A_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[A_COERCE]], 1 -// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [2 x <16 x i8>] poison, <16 x i8> [[A_COERCE_FCA_0_EXTRACT]], 0 -// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x <16 x i8>] [[DOTFCA_0_INSERT]], <16 x i8> [[A_COERCE_FCA_1_EXTRACT]], 1 -// CHECK-NEXT: [[DOTFCA_1_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[DOTFCA_1_INSERT]], 0 -// CHECK-NEXT: [[DOTFCA_1_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[DOTFCA_1_INSERT]], 1 -// CHECK-NEXT: [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[DOTFCA_1_INSERT_FCA_0_EXTRACT]], <16 x i8> [[DOTFCA_1_INSERT_FCA_1_EXTRACT]], <16 x i8> [[B]]) -// CHECK-NEXT: ret <16 x i8> [[VTBL2_I]] -// -mfloat8x16_t test_vqtbl2q_mf8(mfloat8x16x2_t a, mfloat8x16_t b) { - return vqtbl2q_mf8(a, b); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vqtbl3q_mf8( -// CHECK-SAME: [3 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[A_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[A_COERCE]], 0 -// CHECK-NEXT: [[A_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[A_COERCE]], 1 -// CHECK-NEXT: [[A_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[A_COERCE]], 2 -// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [3 x <16 x i8>] poison, <16 x i8> [[A_COERCE_FCA_0_EXTRACT]], 0 -// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [3 x <16 x i8>] [[DOTFCA_0_INSERT]], <16 x i8> [[A_COERCE_FCA_1_EXTRACT]], 1 -// CHECK-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [3 x <16 x i8>] [[DOTFCA_1_INSERT]], <16 x i8> [[A_COERCE_FCA_2_EXTRACT]], 2 -// CHECK-NEXT: [[DOTFCA_2_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[DOTFCA_2_INSERT]], 0 -// CHECK-NEXT: [[DOTFCA_2_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[DOTFCA_2_INSERT]], 1 -// CHECK-NEXT: [[DOTFCA_2_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[DOTFCA_2_INSERT]], 2 -// CHECK-NEXT: [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[DOTFCA_2_INSERT_FCA_0_EXTRACT]], <16 x i8> [[DOTFCA_2_INSERT_FCA_1_EXTRACT]], <16 x i8> [[DOTFCA_2_INSERT_FCA_2_EXTRACT]], <16 x i8> [[B]]) -// CHECK-NEXT: ret <16 x i8> [[VTBL3_I]] -// -mfloat8x16_t test_vqtbl3q_mf8(mfloat8x16x3_t a, mfloat8x16_t b) { - return vqtbl3q_mf8(a, b); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vqtbl3_mf8( -// CHECK-SAME: [3 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[A_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[A_COERCE]], 0 -// CHECK-NEXT: [[A_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[A_COERCE]], 1 -// CHECK-NEXT: [[A_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[A_COERCE]], 2 -// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [3 x <16 x i8>] poison, <16 x i8> [[A_COERCE_FCA_0_EXTRACT]], 0 -// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [3 x <16 x i8>] [[DOTFCA_0_INSERT]], <16 x i8> [[A_COERCE_FCA_1_EXTRACT]], 1 -// CHECK-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [3 x <16 x i8>] [[DOTFCA_1_INSERT]], <16 x i8> [[A_COERCE_FCA_2_EXTRACT]], 2 -// CHECK-NEXT: [[DOTFCA_2_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[DOTFCA_2_INSERT]], 0 -// CHECK-NEXT: [[DOTFCA_2_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[DOTFCA_2_INSERT]], 1 -// CHECK-NEXT: [[DOTFCA_2_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[DOTFCA_2_INSERT]], 2 -// CHECK-NEXT: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[DOTFCA_2_INSERT_FCA_0_EXTRACT]], <16 x i8> [[DOTFCA_2_INSERT_FCA_1_EXTRACT]], <16 x i8> [[DOTFCA_2_INSERT_FCA_2_EXTRACT]], <8 x i8> [[B]]) -// CHECK-NEXT: ret <8 x i8> [[VTBL3_I]] -// -mfloat8x8_t test_vqtbl3_mf8(mfloat8x16x3_t a, uint8x8_t b) { - return vqtbl3_mf8(a, b); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vqtbl4_mf8( -// CHECK-SAME: [4 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[A_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[A_COERCE]], 0 -// CHECK-NEXT: [[A_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[A_COERCE]], 1 -// CHECK-NEXT: [[A_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[A_COERCE]], 2 -// CHECK-NEXT: [[A_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[A_COERCE]], 3 -// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [4 x <16 x i8>] poison, <16 x i8> [[A_COERCE_FCA_0_EXTRACT]], 0 -// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [4 x <16 x i8>] [[DOTFCA_0_INSERT]], <16 x i8> [[A_COERCE_FCA_1_EXTRACT]], 1 -// CHECK-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [4 x <16 x i8>] [[DOTFCA_1_INSERT]], <16 x i8> [[A_COERCE_FCA_2_EXTRACT]], 2 -// CHECK-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [4 x <16 x i8>] [[DOTFCA_2_INSERT]], <16 x i8> [[A_COERCE_FCA_3_EXTRACT]], 3 -// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[DOTFCA_3_INSERT]], 0 -// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[DOTFCA_3_INSERT]], 1 -// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[DOTFCA_3_INSERT]], 2 -// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[DOTFCA_3_INSERT]], 3 -// CHECK-NEXT: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[DOTFCA_3_INSERT_FCA_0_EXTRACT]], <16 x i8> [[DOTFCA_3_INSERT_FCA_1_EXTRACT]], <16 x i8> [[DOTFCA_3_INSERT_FCA_2_EXTRACT]], <16 x i8> [[DOTFCA_3_INSERT_FCA_3_EXTRACT]], <8 x i8> [[B]]) -// CHECK-NEXT: ret <8 x i8> [[VTBL4_I]] -// -mfloat8x8_t test_vqtbl4_mf8(mfloat8x16x4_t a, uint8x8_t b) { - return vqtbl4_mf8(a, b); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vqtbl4q_mf8( -// CHECK-SAME: [4 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[A_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[A_COERCE]], 0 -// CHECK-NEXT: [[A_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[A_COERCE]], 1 -// CHECK-NEXT: [[A_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[A_COERCE]], 2 -// CHECK-NEXT: [[A_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[A_COERCE]], 3 -// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [4 x <16 x i8>] poison, <16 x i8> [[A_COERCE_FCA_0_EXTRACT]], 0 -// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [4 x <16 x i8>] [[DOTFCA_0_INSERT]], <16 x i8> [[A_COERCE_FCA_1_EXTRACT]], 1 -// CHECK-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [4 x <16 x i8>] [[DOTFCA_1_INSERT]], <16 x i8> [[A_COERCE_FCA_2_EXTRACT]], 2 -// CHECK-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [4 x <16 x i8>] [[DOTFCA_2_INSERT]], <16 x i8> [[A_COERCE_FCA_3_EXTRACT]], 3 -// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[DOTFCA_3_INSERT]], 0 -// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[DOTFCA_3_INSERT]], 1 -// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[DOTFCA_3_INSERT]], 2 -// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[DOTFCA_3_INSERT]], 3 -// CHECK-NEXT: [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[DOTFCA_3_INSERT_FCA_0_EXTRACT]], <16 x i8> [[DOTFCA_3_INSERT_FCA_1_EXTRACT]], <16 x i8> [[DOTFCA_3_INSERT_FCA_2_EXTRACT]], <16 x i8> [[DOTFCA_3_INSERT_FCA_3_EXTRACT]], <16 x i8> [[B]]) -// CHECK-NEXT: ret <16 x i8> [[VTBL4_I]] -// -mfloat8x16_t test_vqtbl4q_mf8(mfloat8x16x4_t a, mfloat8x16_t b) { - return vqtbl4q_mf8(a, b); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vqtbx1_mf8( -// CHECK-SAME: <8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <8 x i8> [[C]]) -// CHECK-NEXT: ret <8 x i8> [[VTBX1_I]] -// -mfloat8x8_t test_vqtbx1_mf8(mfloat8x8_t a, mfloat8x16_t b, uint8x8_t c) { - return vqtbx1_mf8(a, b, c); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vqtbx1q_mf8( -// CHECK-SAME: <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]]) -// CHECK-NEXT: ret <16 x i8> [[VTBX1_I]] -// -mfloat8x16_t test_vqtbx1q_mf8(mfloat8x16_t a, mfloat8x16_t b, uint8x16_t c) { - return vqtbx1q_mf8(a, b, c); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vqtbx2_mf8( -// CHECK-SAME: <8 x i8> [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[B_COERCE]], 0 -// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[B_COERCE]], 1 -// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [2 x <16 x i8>] poison, <16 x i8> [[B_COERCE_FCA_0_EXTRACT]], 0 -// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x <16 x i8>] [[DOTFCA_0_INSERT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], 1 -// CHECK-NEXT: [[DOTFCA_1_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[DOTFCA_1_INSERT]], 0 -// CHECK-NEXT: [[DOTFCA_1_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[DOTFCA_1_INSERT]], 1 -// CHECK-NEXT: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[DOTFCA_1_INSERT_FCA_0_EXTRACT]], <16 x i8> [[DOTFCA_1_INSERT_FCA_1_EXTRACT]], <8 x i8> [[C]]) -// CHECK-NEXT: ret <8 x i8> [[VTBX2_I]] -// -mfloat8x8_t test_vqtbx2_mf8(mfloat8x8_t a, mfloat8x16x2_t b, uint8x8_t c) { - return vqtbx2_mf8(a, b, c); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vqtbx2q_mf8( -// CHECK-SAME: <16 x i8> [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> [[C:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[B_COERCE]], 0 -// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[B_COERCE]], 1 -// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [2 x <16 x i8>] poison, <16 x i8> [[B_COERCE_FCA_0_EXTRACT]], 0 -// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x <16 x i8>] [[DOTFCA_0_INSERT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], 1 -// CHECK-NEXT: [[DOTFCA_1_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[DOTFCA_1_INSERT]], 0 -// CHECK-NEXT: [[DOTFCA_1_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[DOTFCA_1_INSERT]], 1 -// CHECK-NEXT: [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> [[A]], <16 x i8> [[DOTFCA_1_INSERT_FCA_0_EXTRACT]], <16 x i8> [[DOTFCA_1_INSERT_FCA_1_EXTRACT]], <16 x i8> [[C]]) -// CHECK-NEXT: ret <16 x i8> [[VTBX2_I]] -// -mfloat8x16_t test_vqtbx2q_mf8(mfloat8x16_t a, mfloat8x16x2_t b, mfloat8x16_t c) { - return vqtbx2q_mf8(a, b, c); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vqtbx3_mf8( -// CHECK-SAME: <8 x i8> [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 0 -// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 1 -// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 2 -// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [3 x <16 x i8>] poison, <16 x i8> [[B_COERCE_FCA_0_EXTRACT]], 0 -// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [3 x <16 x i8>] [[DOTFCA_0_INSERT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], 1 -// CHECK-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [3 x <16 x i8>] [[DOTFCA_1_INSERT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], 2 -// CHECK-NEXT: [[DOTFCA_2_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[DOTFCA_2_INSERT]], 0 -// CHECK-NEXT: [[DOTFCA_2_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[DOTFCA_2_INSERT]], 1 -// CHECK-NEXT: [[DOTFCA_2_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[DOTFCA_2_INSERT]], 2 -// CHECK-NEXT: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> [[A]], <16 x i8> [[DOTFCA_2_INSERT_FCA_0_EXTRACT]], <16 x i8> [[DOTFCA_2_INSERT_FCA_1_EXTRACT]], <16 x i8> [[DOTFCA_2_INSERT_FCA_2_EXTRACT]], <8 x i8> [[C]]) -// CHECK-NEXT: ret <8 x i8> [[VTBX3_I]] -// -mfloat8x8_t test_vqtbx3_mf8(mfloat8x8_t a, mfloat8x16x3_t b, uint8x8_t c) { - return vqtbx3_mf8(a, b, c); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vqtbx3q_mf8( -// CHECK-SAME: <16 x i8> [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> [[C:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 0 -// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 1 -// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 2 -// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [3 x <16 x i8>] poison, <16 x i8> [[B_COERCE_FCA_0_EXTRACT]], 0 -// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [3 x <16 x i8>] [[DOTFCA_0_INSERT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], 1 -// CHECK-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [3 x <16 x i8>] [[DOTFCA_1_INSERT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], 2 -// CHECK-NEXT: [[DOTFCA_2_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[DOTFCA_2_INSERT]], 0 -// CHECK-NEXT: [[DOTFCA_2_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[DOTFCA_2_INSERT]], 1 -// CHECK-NEXT: [[DOTFCA_2_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[DOTFCA_2_INSERT]], 2 -// CHECK-NEXT: [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> [[A]], <16 x i8> [[DOTFCA_2_INSERT_FCA_0_EXTRACT]], <16 x i8> [[DOTFCA_2_INSERT_FCA_1_EXTRACT]], <16 x i8> [[DOTFCA_2_INSERT_FCA_2_EXTRACT]], <16 x i8> [[C]]) -// CHECK-NEXT: ret <16 x i8> [[VTBX3_I]] -// -mfloat8x16_t test_vqtbx3q_mf8(mfloat8x16_t a, mfloat8x16x3_t b, mfloat8x16_t c) { - return vqtbx3q_mf8(a, b, c); -} - -// CHECK-LABEL: define dso_local <8 x i8> @test_vqtbx4_mf8( -// CHECK-SAME: <8 x i8> [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 0 -// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 1 -// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 2 -// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 3 -// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [4 x <16 x i8>] poison, <16 x i8> [[B_COERCE_FCA_0_EXTRACT]], 0 -// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [4 x <16 x i8>] [[DOTFCA_0_INSERT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], 1 -// CHECK-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [4 x <16 x i8>] [[DOTFCA_1_INSERT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], 2 -// CHECK-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [4 x <16 x i8>] [[DOTFCA_2_INSERT]], <16 x i8> [[B_COERCE_FCA_3_EXTRACT]], 3 -// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[DOTFCA_3_INSERT]], 0 -// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[DOTFCA_3_INSERT]], 1 -// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[DOTFCA_3_INSERT]], 2 -// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[DOTFCA_3_INSERT]], 3 -// CHECK-NEXT: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> [[A]], <16 x i8> [[DOTFCA_3_INSERT_FCA_0_EXTRACT]], <16 x i8> [[DOTFCA_3_INSERT_FCA_1_EXTRACT]], <16 x i8> [[DOTFCA_3_INSERT_FCA_2_EXTRACT]], <16 x i8> [[DOTFCA_3_INSERT_FCA_3_EXTRACT]], <8 x i8> [[C]]) -// CHECK-NEXT: ret <8 x i8> [[VTBX4_I]] -// -mfloat8x8_t test_vqtbx4_mf8(mfloat8x8_t a, mfloat8x16x4_t b, uint8x8_t c) { - return vqtbx4_mf8(a, b, c); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vqtbx4q_mf8( -// CHECK-SAME: <16 x i8> [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> [[C:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 0 -// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 1 -// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 2 -// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 3 -// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [4 x <16 x i8>] poison, <16 x i8> [[B_COERCE_FCA_0_EXTRACT]], 0 -// CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [4 x <16 x i8>] [[DOTFCA_0_INSERT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], 1 -// CHECK-NEXT: [[DOTFCA_2_INSERT:%.*]] = insertvalue [4 x <16 x i8>] [[DOTFCA_1_INSERT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], 2 -// CHECK-NEXT: [[DOTFCA_3_INSERT:%.*]] = insertvalue [4 x <16 x i8>] [[DOTFCA_2_INSERT]], <16 x i8> [[B_COERCE_FCA_3_EXTRACT]], 3 -// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[DOTFCA_3_INSERT]], 0 -// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[DOTFCA_3_INSERT]], 1 -// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[DOTFCA_3_INSERT]], 2 -// CHECK-NEXT: [[DOTFCA_3_INSERT_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[DOTFCA_3_INSERT]], 3 -// CHECK-NEXT: [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> [[A]], <16 x i8> [[DOTFCA_3_INSERT_FCA_0_EXTRACT]], <16 x i8> [[DOTFCA_3_INSERT_FCA_1_EXTRACT]], <16 x i8> [[DOTFCA_3_INSERT_FCA_2_EXTRACT]], <16 x i8> [[DOTFCA_3_INSERT_FCA_3_EXTRACT]], <16 x i8> [[C]]) -// CHECK-NEXT: ret <16 x i8> [[VTBX4_I]] -// -mfloat8x16_t test_vqtbx4q_mf8(mfloat8x16_t a, mfloat8x16x4_t b, mfloat8x16_t c) { - return vqtbx4q_mf8(a, b, c); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_lane_mf8( -// CHECK-SAME: <8 x i8> [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8(<8 x i8> [[VN]], <8 x i8> [[VM]], i32 0) -// CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]] -// -mfloat8x16_t test_vluti2_lane_mf8(mfloat8x8_t vn, uint8x8_t vm) { - return vluti2_lane_mf8(vn, vm, 0); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_lane_mf8( -// CHECK-SAME: <16 x i8> [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VLUTI2_LANE:%.*]] = call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 1) -// CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]] -// -mfloat8x16_t test_vluti2q_lane_mf8(mfloat8x16_t vn, uint8x8_t vm) { - return vluti2q_lane_mf8(vn, vm, 1); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_laneq_mf8( -// CHECK-SAME: <8 x i8> [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VLUTI2_LANEQ:%.*]] = call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v8i8(<8 x i8> [[VN]], <16 x i8> [[VM]], i32 0) -// CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANEQ]] -// -mfloat8x16_t test_vluti2_laneq_mf8(mfloat8x8_t vn, uint8x16_t vm) { - return vluti2_laneq_mf8(vn, vm, 0); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_laneq_mf8( -// CHECK-SAME: <16 x i8> [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VLUTI2_LANEQ:%.*]] = call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 3) -// CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANEQ]] -// -mfloat8x16_t test_vluti2q_laneq_mf8(mfloat8x16_t vn, uint8x16_t vm) { - return vluti2q_laneq_mf8(vn, vm, 3); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vluti4q_lane_mf8( -// CHECK-SAME: <16 x i8> [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VLUTI4Q_LANE:%.*]] = call <16 x i8> @llvm.aarch64.neon.vluti4q.lane.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 0) -// CHECK-NEXT: ret <16 x i8> [[VLUTI4Q_LANE]] -// -mfloat8x16_t test_vluti4q_lane_mf8(mfloat8x16_t vn, uint8x8_t vm) { - return vluti4q_lane_mf8(vn, vm, 0); -} - -// CHECK-LABEL: define dso_local <16 x i8> @test_vluti4q_laneq_mf8( -// CHECK-SAME: <16 x i8> [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VLUTI4Q_LANEQ:%.*]] = call <16 x i8> @llvm.aarch64.neon.vluti4q.laneq.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 1) -// CHECK-NEXT: ret <16 x i8> [[VLUTI4Q_LANEQ]] -// -mfloat8x16_t test_vluti4q_laneq_mf8(mfloat8x16_t vn, uint8x16_t vm) { - return vluti4q_laneq_mf8(vn, vm, 1); -} diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fdot.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fdot.c index 2f3994df03784..0b355db4b2073 100644 --- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fdot.c +++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fdot.c @@ -49,8 +49,8 @@ svfloat32_t test_svdot_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t zm, // CHECK-LABEL: define dso_local @test_svdot_n_f32_mf8( // CHECK-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fdot.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -59,8 +59,8 @@ svfloat32_t test_svdot_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t zm, // CHECK-CXX-LABEL: define dso_local @_Z20test_svdot_n_f32_mf8u13__SVFloat32_tu13__SVMfloat8_tu6__mfp8m( // CHECK-CXX-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-CXX-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fdot.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -91,8 +91,8 @@ svfloat16_t test_svdot_f16_mf8(svfloat16_t zda, svmfloat8_t zn, svmfloat8_t zm, // CHECK-LABEL: define dso_local @test_svdot_n_f16_mf8( // CHECK-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fdot.nxv8f16( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -101,8 +101,8 @@ svfloat16_t test_svdot_f16_mf8(svfloat16_t zda, svmfloat8_t zn, svmfloat8_t zm, // CHECK-CXX-LABEL: define dso_local @_Z20test_svdot_n_f16_mf8u13__SVFloat16_tu13__SVMfloat8_tu6__mfp8m( // CHECK-CXX-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-CXX-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fdot.nxv8f16( [[ZDA]], [[ZN]], [[DOTSPLAT]]) diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fmla.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fmla.c index 425e6a57ffe3c..0daeeec9e7dd7 100644 --- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fmla.c +++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fmla.c @@ -49,8 +49,8 @@ svfloat16_t test_svmlalb_f16_mf8(svfloat16_t zda, svmfloat8_t zn, svmfloat8_t zm // CHECK-LABEL: define dso_local @test_svmlalb_n_f16_mf8( // CHECK-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlalb.nxv8f16( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -59,8 +59,8 @@ svfloat16_t test_svmlalb_f16_mf8(svfloat16_t zda, svmfloat8_t zn, svmfloat8_t zm // CHECK-CXX-LABEL: define dso_local @_Z22test_svmlalb_n_f16_mf8u13__SVFloat16_tu13__SVMfloat8_tu6__mfp8m( // CHECK-CXX-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-CXX-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlalb.nxv8f16( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -91,8 +91,8 @@ svfloat16_t test_svmlalt_f16_mf8(svfloat16_t zda, svmfloat8_t zn, svmfloat8_t zm // CHECK-LABEL: define dso_local @test_svmlalt_n_f16_mf8( // CHECK-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlalt.nxv8f16( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -101,8 +101,8 @@ svfloat16_t test_svmlalt_f16_mf8(svfloat16_t zda, svmfloat8_t zn, svmfloat8_t zm // CHECK-CXX-LABEL: define dso_local @_Z22test_svmlalt_n_f16_mf8u13__SVFloat16_tu13__SVMfloat8_tu6__mfp8m( // CHECK-CXX-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-CXX-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlalt.nxv8f16( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -169,8 +169,8 @@ svfloat32_t test_svmlallbb_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t // CHECK-LABEL: define dso_local @test_svmlallbb_n_f32_mf8( // CHECK-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlallbb.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -179,8 +179,8 @@ svfloat32_t test_svmlallbb_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t // CHECK-CXX-LABEL: define dso_local @_Z24test_svmlallbb_n_f32_mf8u13__SVFloat32_tu13__SVMfloat8_tu6__mfp8m( // CHECK-CXX-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-CXX-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlallbb.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -211,8 +211,8 @@ svfloat32_t test_svmlallbt_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t // CHECK-LABEL: define dso_local @test_svmlallbt_n_f32_mf8( // CHECK-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlallbt.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -221,8 +221,8 @@ svfloat32_t test_svmlallbt_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t // CHECK-CXX-LABEL: define dso_local @_Z24test_svmlallbt_n_f32_mf8u13__SVFloat32_tu13__SVMfloat8_tu6__mfp8m( // CHECK-CXX-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-CXX-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlallbt.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -253,8 +253,8 @@ svfloat32_t test_svmlalltb_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t // CHECK-LABEL: define dso_local @test_svmlalltb_n_f32_mf8( // CHECK-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlalltb.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -263,8 +263,8 @@ svfloat32_t test_svmlalltb_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t // CHECK-CXX-LABEL: define dso_local @_Z24test_svmlalltb_n_f32_mf8u13__SVFloat32_tu13__SVMfloat8_tu6__mfp8m( // CHECK-CXX-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-CXX-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlalltb.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -295,8 +295,8 @@ svfloat32_t test_svmlalltt_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t // CHECK-LABEL: define dso_local @test_svmlalltt_n_f32_mf8( // CHECK-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlalltt.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) @@ -305,8 +305,8 @@ svfloat32_t test_svmlalltt_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t // CHECK-CXX-LABEL: define dso_local @_Z24test_svmlalltt_n_f32_mf8u13__SVFloat32_tu13__SVMfloat8_tu6__mfp8m( // CHECK-CXX-SAME: [[ZDA:%.*]], [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0 +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) // CHECK-CXX-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i8 [[TMP0]], i64 0 // CHECK-CXX-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer // CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.fp8.fmlalltt.nxv4f32( [[ZDA]], [[ZN]], [[DOTSPLAT]]) diff --git a/clang/test/CodeGen/AArch64/struct-coerce-using-ptr.cpp b/clang/test/CodeGen/AArch64/struct-coerce-using-ptr.cpp deleted file mode 100644 index f7a44a5999887..0000000000000 --- a/clang/test/CodeGen/AArch64/struct-coerce-using-ptr.cpp +++ /dev/null @@ -1,622 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 -// RUN: %clang_cc1 -triple aarch64-none-elf -fcxx-exceptions -fexceptions -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-A64 -// RUN: %clang_cc1 -triple arm64_32-apple-ios7.0 -fcxx-exceptions -fexceptions -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-A64_32 - -struct Sll { - long long x, y; -}; -// CHECK-A64-LABEL: define dso_local void @_Z3Tll3Sll( -// CHECK-A64-SAME: [2 x i64] [[S_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK-A64-NEXT: [[ENTRY:.*:]] -// CHECK-A64-NEXT: [[S:%.*]] = alloca [[STRUCT_SLL:%.*]], align 8 -// CHECK-A64-NEXT: store [2 x i64] [[S_COERCE]], ptr [[S]], align 8 -// CHECK-A64-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SLL]], ptr [[S]], i32 0, i32 0 -// CHECK-A64-NEXT: store i64 1, ptr [[X]], align 8 -// CHECK-A64-NEXT: ret void -// -// CHECK-A64_32-LABEL: define void @_Z3Tll3Sll( -// CHECK-A64_32-SAME: [2 x i64] [[S_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK-A64_32-NEXT: [[ENTRY:.*:]] -// CHECK-A64_32-NEXT: [[S:%.*]] = alloca [[STRUCT_SLL:%.*]], align 8 -// CHECK-A64_32-NEXT: store [2 x i64] [[S_COERCE]], ptr [[S]], align 8 -// CHECK-A64_32-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SLL]], ptr [[S]], i32 0, i32 0 -// CHECK-A64_32-NEXT: store i64 1, ptr [[X]], align 8 -// CHECK-A64_32-NEXT: ret void -// -void Tll(Sll s) { s.x = 1; } - -struct Sp { - int *x; -}; -// CHECK-A64-LABEL: define dso_local void @_Z2Tp2Sp( -// CHECK-A64-SAME: i64 [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64-NEXT: [[ENTRY:.*:]] -// CHECK-A64-NEXT: [[S:%.*]] = alloca [[STRUCT_SP:%.*]], align 8 -// CHECK-A64-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_SP]], ptr [[S]], i32 0, i32 0 -// CHECK-A64-NEXT: [[COERCE_VAL_IP:%.*]] = inttoptr i64 [[S_COERCE]] to ptr -// CHECK-A64-NEXT: store ptr [[COERCE_VAL_IP]], ptr [[COERCE_DIVE]], align 8 -// CHECK-A64-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SP]], ptr [[S]], i32 0, i32 0 -// CHECK-A64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X]], align 8 -// CHECK-A64-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64-NEXT: ret void -// -// CHECK-A64_32-LABEL: define void @_Z2Tp2Sp( -// CHECK-A64_32-SAME: i64 [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64_32-NEXT: [[ENTRY:.*:]] -// CHECK-A64_32-NEXT: [[S:%.*]] = alloca [[STRUCT_SP:%.*]], align 4 -// CHECK-A64_32-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_SP]], ptr [[S]], i32 0, i32 0 -// CHECK-A64_32-NEXT: [[COERCE_VAL_II:%.*]] = trunc i64 [[S_COERCE]] to i32 -// CHECK-A64_32-NEXT: store i32 [[COERCE_VAL_II]], ptr [[COERCE_DIVE]], align 4 -// CHECK-A64_32-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SP]], ptr [[S]], i32 0, i32 0 -// CHECK-A64_32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X]], align 4 -// CHECK-A64_32-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64_32-NEXT: ret void -// -void Tp(Sp s) { *s.x = 1; } - -struct Spp { - int *x, *y; -}; -// CHECK-A64-LABEL: define dso_local void @_Z3Tpp3Spp( -// CHECK-A64-SAME: [2 x i64] [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64-NEXT: [[ENTRY:.*:]] -// CHECK-A64-NEXT: [[S:%.*]] = alloca [[STRUCT_SPP:%.*]], align 8 -// CHECK-A64-NEXT: store [2 x i64] [[S_COERCE]], ptr [[S]], align 8 -// CHECK-A64-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SPP]], ptr [[S]], i32 0, i32 0 -// CHECK-A64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X]], align 8 -// CHECK-A64-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64-NEXT: ret void -// -// CHECK-A64_32-LABEL: define void @_Z3Tpp3Spp( -// CHECK-A64_32-SAME: i64 [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64_32-NEXT: [[ENTRY:.*:]] -// CHECK-A64_32-NEXT: [[S:%.*]] = alloca [[STRUCT_SPP:%.*]], align 4 -// CHECK-A64_32-NEXT: store i64 [[S_COERCE]], ptr [[S]], align 4 -// CHECK-A64_32-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SPP]], ptr [[S]], i32 0, i32 0 -// CHECK-A64_32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X]], align 4 -// CHECK-A64_32-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64_32-NEXT: ret void -// -void Tpp(Spp s) { *s.x = 1; } - -struct Sppp { - int *x, *y, *z; -}; -// CHECK-A64-LABEL: define dso_local void @_Z4Tppp4Sppp( -// CHECK-A64-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] { -// CHECK-A64-NEXT: [[ENTRY:.*:]] -// CHECK-A64-NEXT: [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-A64-NEXT: store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 8 -// CHECK-A64-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SPPP:%.*]], ptr [[S]], i32 0, i32 0 -// CHECK-A64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X]], align 8 -// CHECK-A64-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64-NEXT: ret void -// -// CHECK-A64_32-LABEL: define void @_Z4Tppp4Sppp( -// CHECK-A64_32-SAME: [2 x i64] [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64_32-NEXT: [[ENTRY:.*:]] -// CHECK-A64_32-NEXT: [[S:%.*]] = alloca [[STRUCT_SPPP:%.*]], align 4 -// CHECK-A64_32-NEXT: [[TMP_COERCE:%.*]] = alloca [2 x i64], align 8 -// CHECK-A64_32-NEXT: store [2 x i64] [[S_COERCE]], ptr [[TMP_COERCE]], align 8 -// CHECK-A64_32-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[S]], ptr align 8 [[TMP_COERCE]], i32 12, i1 false) -// CHECK-A64_32-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SPPP]], ptr [[S]], i32 0, i32 0 -// CHECK-A64_32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X]], align 4 -// CHECK-A64_32-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64_32-NEXT: ret void -// -void Tppp(Sppp s) { *s.x = 1; } - -struct Spi { - int *x, y; -}; -// CHECK-A64-LABEL: define dso_local void @_Z3Tpi3Spi( -// CHECK-A64-SAME: [2 x i64] [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64-NEXT: [[ENTRY:.*:]] -// CHECK-A64-NEXT: [[S:%.*]] = alloca [[STRUCT_SPI:%.*]], align 8 -// CHECK-A64-NEXT: store [2 x i64] [[S_COERCE]], ptr [[S]], align 8 -// CHECK-A64-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SPI]], ptr [[S]], i32 0, i32 0 -// CHECK-A64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X]], align 8 -// CHECK-A64-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64-NEXT: ret void -// -// CHECK-A64_32-LABEL: define void @_Z3Tpi3Spi( -// CHECK-A64_32-SAME: i64 [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64_32-NEXT: [[ENTRY:.*:]] -// CHECK-A64_32-NEXT: [[S:%.*]] = alloca [[STRUCT_SPI:%.*]], align 4 -// CHECK-A64_32-NEXT: store i64 [[S_COERCE]], ptr [[S]], align 4 -// CHECK-A64_32-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SPI]], ptr [[S]], i32 0, i32 0 -// CHECK-A64_32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X]], align 4 -// CHECK-A64_32-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64_32-NEXT: ret void -// -void Tpi(Spi s) { *s.x = 1; } - -struct Srp { - int &x, *y; -}; -// CHECK-A64-LABEL: define dso_local void @_Z3Trp3Srp( -// CHECK-A64-SAME: [2 x i64] [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64-NEXT: [[ENTRY:.*:]] -// CHECK-A64-NEXT: [[S:%.*]] = alloca [[STRUCT_SRP:%.*]], align 8 -// CHECK-A64-NEXT: store [2 x i64] [[S_COERCE]], ptr [[S]], align 8 -// CHECK-A64-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SRP]], ptr [[S]], i32 0, i32 0 -// CHECK-A64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X]], align 8 -// CHECK-A64-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64-NEXT: ret void -// -// CHECK-A64_32-LABEL: define void @_Z3Trp3Srp( -// CHECK-A64_32-SAME: i64 [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64_32-NEXT: [[ENTRY:.*:]] -// CHECK-A64_32-NEXT: [[S:%.*]] = alloca [[STRUCT_SRP:%.*]], align 4 -// CHECK-A64_32-NEXT: store i64 [[S_COERCE]], ptr [[S]], align 4 -// CHECK-A64_32-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SRP]], ptr [[S]], i32 0, i32 0 -// CHECK-A64_32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X]], align 4 -// CHECK-A64_32-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64_32-NEXT: ret void -// -void Trp(Srp s) { s.x = 1; } - -struct __attribute__((__packed__)) Spp_packed { - int *x, *y; -}; -// CHECK-A64-LABEL: define dso_local void @_Z10Tpp_packed10Spp_packed( -// CHECK-A64-SAME: [2 x i64] [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64-NEXT: [[ENTRY:.*:]] -// CHECK-A64-NEXT: [[S:%.*]] = alloca [[STRUCT_SPP_PACKED:%.*]], align 1 -// CHECK-A64-NEXT: store [2 x i64] [[S_COERCE]], ptr [[S]], align 1 -// CHECK-A64-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SPP_PACKED]], ptr [[S]], i32 0, i32 0 -// CHECK-A64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X]], align 1 -// CHECK-A64-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64-NEXT: ret void -// -// CHECK-A64_32-LABEL: define void @_Z10Tpp_packed10Spp_packed( -// CHECK-A64_32-SAME: i64 [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64_32-NEXT: [[ENTRY:.*:]] -// CHECK-A64_32-NEXT: [[S:%.*]] = alloca [[STRUCT_SPP_PACKED:%.*]], align 1 -// CHECK-A64_32-NEXT: store i64 [[S_COERCE]], ptr [[S]], align 1 -// CHECK-A64_32-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SPP_PACKED]], ptr [[S]], i32 0, i32 0 -// CHECK-A64_32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X]], align 1 -// CHECK-A64_32-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64_32-NEXT: ret void -// -void Tpp_packed(Spp_packed s) { *s.x = 1; } - -struct __attribute__((__packed__)) Spp_superpacked { - Spp_packed x; -}; -// CHECK-A64-LABEL: define dso_local void @_Z15Tpp_superpacked15Spp_superpacked( -// CHECK-A64-SAME: [2 x i64] [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64-NEXT: [[ENTRY:.*:]] -// CHECK-A64-NEXT: [[S:%.*]] = alloca [[STRUCT_SPP_SUPERPACKED:%.*]], align 1 -// CHECK-A64-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_SPP_SUPERPACKED]], ptr [[S]], i32 0, i32 0 -// CHECK-A64-NEXT: store [2 x i64] [[S_COERCE]], ptr [[COERCE_DIVE]], align 1 -// CHECK-A64-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SPP_SUPERPACKED]], ptr [[S]], i32 0, i32 0 -// CHECK-A64-NEXT: [[X1:%.*]] = getelementptr inbounds nuw [[STRUCT_SPP_PACKED:%.*]], ptr [[X]], i32 0, i32 0 -// CHECK-A64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X1]], align 1 -// CHECK-A64-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64-NEXT: ret void -// -// CHECK-A64_32-LABEL: define void @_Z15Tpp_superpacked15Spp_superpacked( -// CHECK-A64_32-SAME: i64 [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64_32-NEXT: [[ENTRY:.*:]] -// CHECK-A64_32-NEXT: [[S:%.*]] = alloca [[STRUCT_SPP_SUPERPACKED:%.*]], align 1 -// CHECK-A64_32-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_SPP_SUPERPACKED]], ptr [[S]], i32 0, i32 0 -// CHECK-A64_32-NEXT: store i64 [[S_COERCE]], ptr [[COERCE_DIVE]], align 1 -// CHECK-A64_32-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SPP_SUPERPACKED]], ptr [[S]], i32 0, i32 0 -// CHECK-A64_32-NEXT: [[X1:%.*]] = getelementptr inbounds nuw [[STRUCT_SPP_PACKED:%.*]], ptr [[X]], i32 0, i32 0 -// CHECK-A64_32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X1]], align 1 -// CHECK-A64_32-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64_32-NEXT: ret void -// -void Tpp_superpacked(Spp_superpacked s) { *s.x.x = 1; } - -union Upp { - int *x; - long long *y; -}; -// CHECK-A64-LABEL: define dso_local void @_Z11Tupp_packed3Upp( -// CHECK-A64-SAME: i64 [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64-NEXT: [[ENTRY:.*:]] -// CHECK-A64-NEXT: [[S:%.*]] = alloca [[UNION_UPP:%.*]], align 8 -// CHECK-A64-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[UNION_UPP]], ptr [[S]], i32 0, i32 0 -// CHECK-A64-NEXT: [[COERCE_VAL_IP:%.*]] = inttoptr i64 [[S_COERCE]] to ptr -// CHECK-A64-NEXT: store ptr [[COERCE_VAL_IP]], ptr [[COERCE_DIVE]], align 8 -// CHECK-A64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S]], align 8 -// CHECK-A64-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64-NEXT: ret void -// -// CHECK-A64_32-LABEL: define void @_Z11Tupp_packed3Upp( -// CHECK-A64_32-SAME: i64 [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64_32-NEXT: [[ENTRY:.*:]] -// CHECK-A64_32-NEXT: [[S:%.*]] = alloca [[UNION_UPP:%.*]], align 4 -// CHECK-A64_32-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[UNION_UPP]], ptr [[S]], i32 0, i32 0 -// CHECK-A64_32-NEXT: [[COERCE_VAL_II:%.*]] = trunc i64 [[S_COERCE]] to i32 -// CHECK-A64_32-NEXT: store i32 [[COERCE_VAL_II]], ptr [[COERCE_DIVE]], align 4 -// CHECK-A64_32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S]], align 4 -// CHECK-A64_32-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64_32-NEXT: ret void -// -void Tupp_packed(Upp s) { *s.x = 1; } - -union USpp { - Spp s; - long long y; -}; -// CHECK-A64-LABEL: define dso_local void @_Z12TUSpp_packed4USpp( -// CHECK-A64-SAME: [2 x i64] [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64-NEXT: [[ENTRY:.*:]] -// CHECK-A64-NEXT: [[S:%.*]] = alloca [[UNION_USPP:%.*]], align 8 -// CHECK-A64-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[UNION_USPP]], ptr [[S]], i32 0, i32 0 -// CHECK-A64-NEXT: store [2 x i64] [[S_COERCE]], ptr [[COERCE_DIVE]], align 8 -// CHECK-A64-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SPP:%.*]], ptr [[S]], i32 0, i32 0 -// CHECK-A64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X]], align 8 -// CHECK-A64-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64-NEXT: ret void -// -// CHECK-A64_32-LABEL: define void @_Z12TUSpp_packed4USpp( -// CHECK-A64_32-SAME: i64 [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64_32-NEXT: [[ENTRY:.*:]] -// CHECK-A64_32-NEXT: [[S:%.*]] = alloca [[UNION_USPP:%.*]], align 8 -// CHECK-A64_32-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[UNION_USPP]], ptr [[S]], i32 0, i32 0 -// CHECK-A64_32-NEXT: store i64 [[S_COERCE]], ptr [[COERCE_DIVE]], align 8 -// CHECK-A64_32-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SPP:%.*]], ptr [[S]], i32 0, i32 0 -// CHECK-A64_32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X]], align 8 -// CHECK-A64_32-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64_32-NEXT: ret void -// -void TUSpp_packed(USpp s) { *s.s.x = 1; } - -struct Spf { - int *x; - int z[]; -}; -// CHECK-A64-LABEL: define dso_local void @_Z3Tpf3Spf( -// CHECK-A64-SAME: i64 [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64-NEXT: [[ENTRY:.*:]] -// CHECK-A64-NEXT: [[S:%.*]] = alloca [[STRUCT_SPF:%.*]], align 8 -// CHECK-A64-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_SPF]], ptr [[S]], i32 0, i32 0 -// CHECK-A64-NEXT: [[COERCE_VAL_IP:%.*]] = inttoptr i64 [[S_COERCE]] to ptr -// CHECK-A64-NEXT: store ptr [[COERCE_VAL_IP]], ptr [[COERCE_DIVE]], align 8 -// CHECK-A64-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SPF]], ptr [[S]], i32 0, i32 0 -// CHECK-A64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X]], align 8 -// CHECK-A64-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64-NEXT: ret void -// -// CHECK-A64_32-LABEL: define void @_Z3Tpf3Spf( -// CHECK-A64_32-SAME: i64 [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64_32-NEXT: [[ENTRY:.*:]] -// CHECK-A64_32-NEXT: [[S:%.*]] = alloca [[STRUCT_SPF:%.*]], align 4 -// CHECK-A64_32-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_SPF]], ptr [[S]], i32 0, i32 0 -// CHECK-A64_32-NEXT: [[COERCE_VAL_II:%.*]] = trunc i64 [[S_COERCE]] to i32 -// CHECK-A64_32-NEXT: store i32 [[COERCE_VAL_II]], ptr [[COERCE_DIVE]], align 4 -// CHECK-A64_32-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SPF]], ptr [[S]], i32 0, i32 0 -// CHECK-A64_32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X]], align 4 -// CHECK-A64_32-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64_32-NEXT: ret void -// -void Tpf(Spf s) { *s.x = 1; } - -struct Sppf { - int *x, *y; - int z[]; -}; -// CHECK-A64-LABEL: define dso_local void @_Z4Tppf4Sppf( -// CHECK-A64-SAME: [2 x i64] [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64-NEXT: [[ENTRY:.*:]] -// CHECK-A64-NEXT: [[S:%.*]] = alloca [[STRUCT_SPPF:%.*]], align 8 -// CHECK-A64-NEXT: store [2 x i64] [[S_COERCE]], ptr [[S]], align 8 -// CHECK-A64-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SPPF]], ptr [[S]], i32 0, i32 0 -// CHECK-A64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X]], align 8 -// CHECK-A64-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64-NEXT: ret void -// -// CHECK-A64_32-LABEL: define void @_Z4Tppf4Sppf( -// CHECK-A64_32-SAME: i64 [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64_32-NEXT: [[ENTRY:.*:]] -// CHECK-A64_32-NEXT: [[S:%.*]] = alloca [[STRUCT_SPPF:%.*]], align 4 -// CHECK-A64_32-NEXT: store i64 [[S_COERCE]], ptr [[S]], align 4 -// CHECK-A64_32-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SPPF]], ptr [[S]], i32 0, i32 0 -// CHECK-A64_32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X]], align 4 -// CHECK-A64_32-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64_32-NEXT: ret void -// -void Tppf(Sppf s) { *s.x = 1; } - -struct SSpSp { - struct Sp a, b; -}; -// CHECK-A64-LABEL: define dso_local void @_Z5TSpSp5SSpSp( -// CHECK-A64-SAME: [2 x i64] [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64-NEXT: [[ENTRY:.*:]] -// CHECK-A64-NEXT: [[S:%.*]] = alloca [[STRUCT_SSPSP:%.*]], align 8 -// CHECK-A64-NEXT: store [2 x i64] [[S_COERCE]], ptr [[S]], align 8 -// CHECK-A64-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_SSPSP]], ptr [[S]], i32 0, i32 0 -// CHECK-A64-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SP:%.*]], ptr [[A]], i32 0, i32 0 -// CHECK-A64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X]], align 8 -// CHECK-A64-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64-NEXT: ret void -// -// CHECK-A64_32-LABEL: define void @_Z5TSpSp5SSpSp( -// CHECK-A64_32-SAME: i64 [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64_32-NEXT: [[ENTRY:.*:]] -// CHECK-A64_32-NEXT: [[S:%.*]] = alloca [[STRUCT_SSPSP:%.*]], align 4 -// CHECK-A64_32-NEXT: store i64 [[S_COERCE]], ptr [[S]], align 4 -// CHECK-A64_32-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_SSPSP]], ptr [[S]], i32 0, i32 0 -// CHECK-A64_32-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SP:%.*]], ptr [[A]], i32 0, i32 0 -// CHECK-A64_32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X]], align 4 -// CHECK-A64_32-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64_32-NEXT: ret void -// -void TSpSp(SSpSp s) { *s.a.x = 1; } - -struct SSpp { - Spp a; -}; -// CHECK-A64-LABEL: define dso_local void @_Z4TSpp4SSpp( -// CHECK-A64-SAME: [2 x i64] [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64-NEXT: [[ENTRY:.*:]] -// CHECK-A64-NEXT: [[S:%.*]] = alloca [[STRUCT_SSPP:%.*]], align 8 -// CHECK-A64-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_SSPP]], ptr [[S]], i32 0, i32 0 -// CHECK-A64-NEXT: store [2 x i64] [[S_COERCE]], ptr [[COERCE_DIVE]], align 8 -// CHECK-A64-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_SSPP]], ptr [[S]], i32 0, i32 0 -// CHECK-A64-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SPP:%.*]], ptr [[A]], i32 0, i32 0 -// CHECK-A64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X]], align 8 -// CHECK-A64-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64-NEXT: ret void -// -// CHECK-A64_32-LABEL: define void @_Z4TSpp4SSpp( -// CHECK-A64_32-SAME: i64 [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64_32-NEXT: [[ENTRY:.*:]] -// CHECK-A64_32-NEXT: [[S:%.*]] = alloca [[STRUCT_SSPP:%.*]], align 4 -// CHECK-A64_32-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_SSPP]], ptr [[S]], i32 0, i32 0 -// CHECK-A64_32-NEXT: store i64 [[S_COERCE]], ptr [[COERCE_DIVE]], align 4 -// CHECK-A64_32-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_SSPP]], ptr [[S]], i32 0, i32 0 -// CHECK-A64_32-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SPP:%.*]], ptr [[A]], i32 0, i32 0 -// CHECK-A64_32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X]], align 4 -// CHECK-A64_32-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64_32-NEXT: ret void -// -void TSpp(SSpp s) { *s.a.x = 1; } - -struct SSp : public Sp { - int* b; -}; -// CHECK-A64-LABEL: define dso_local void @_Z3TSp3SSp( -// CHECK-A64-SAME: [2 x i64] [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64-NEXT: [[ENTRY:.*:]] -// CHECK-A64-NEXT: [[S:%.*]] = alloca [[STRUCT_SSP:%.*]], align 8 -// CHECK-A64-NEXT: store [2 x i64] [[S_COERCE]], ptr [[S]], align 8 -// CHECK-A64-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SP:%.*]], ptr [[S]], i32 0, i32 0 -// CHECK-A64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X]], align 8 -// CHECK-A64-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64-NEXT: ret void -// -// CHECK-A64_32-LABEL: define void @_Z3TSp3SSp( -// CHECK-A64_32-SAME: i64 [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64_32-NEXT: [[ENTRY:.*:]] -// CHECK-A64_32-NEXT: [[S:%.*]] = alloca [[STRUCT_SSP:%.*]], align 4 -// CHECK-A64_32-NEXT: store i64 [[S_COERCE]], ptr [[S]], align 4 -// CHECK-A64_32-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SP:%.*]], ptr [[S]], i32 0, i32 0 -// CHECK-A64_32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X]], align 4 -// CHECK-A64_32-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64_32-NEXT: ret void -// -void TSp(SSp s) { *s.x = 1; } - -struct Si { - int x; -}; -struct SSpi : public Si { - int* y; -}; -// CHECK-A64-LABEL: define dso_local void @_Z4TSpi4SSpi( -// CHECK-A64-SAME: [2 x i64] [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64-NEXT: [[ENTRY:.*:]] -// CHECK-A64-NEXT: [[S:%.*]] = alloca [[STRUCT_SSPI:%.*]], align 8 -// CHECK-A64-NEXT: store [2 x i64] [[S_COERCE]], ptr [[S]], align 8 -// CHECK-A64-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SI:%.*]], ptr [[S]], i32 0, i32 0 -// CHECK-A64-NEXT: store i32 1, ptr [[X]], align 8 -// CHECK-A64-NEXT: ret void -// -// CHECK-A64_32-LABEL: define void @_Z4TSpi4SSpi( -// CHECK-A64_32-SAME: i64 [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64_32-NEXT: [[ENTRY:.*:]] -// CHECK-A64_32-NEXT: [[S:%.*]] = alloca [[STRUCT_SSPI:%.*]], align 4 -// CHECK-A64_32-NEXT: store i64 [[S_COERCE]], ptr [[S]], align 4 -// CHECK-A64_32-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SI:%.*]], ptr [[S]], i32 0, i32 0 -// CHECK-A64_32-NEXT: store i32 1, ptr [[X]], align 4 -// CHECK-A64_32-NEXT: ret void -// -void TSpi(SSpi s) { s.x = 1; } - -struct Spa { - int* xs[1]; -}; -// CHECK-A64-LABEL: define dso_local void @_Z3Tpa3Spa( -// CHECK-A64-SAME: i64 [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64-NEXT: [[ENTRY:.*:]] -// CHECK-A64-NEXT: [[S:%.*]] = alloca [[STRUCT_SPA:%.*]], align 8 -// CHECK-A64-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_SPA]], ptr [[S]], i32 0, i32 0 -// CHECK-A64-NEXT: store i64 [[S_COERCE]], ptr [[COERCE_DIVE]], align 8 -// CHECK-A64-NEXT: [[XS:%.*]] = getelementptr inbounds nuw [[STRUCT_SPA]], ptr [[S]], i32 0, i32 0 -// CHECK-A64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1 x ptr], ptr [[XS]], i64 0, i64 0 -// CHECK-A64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK-A64-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64-NEXT: ret void -// -// CHECK-A64_32-LABEL: define void @_Z3Tpa3Spa( -// CHECK-A64_32-SAME: i64 [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64_32-NEXT: [[ENTRY:.*:]] -// CHECK-A64_32-NEXT: [[S:%.*]] = alloca [[STRUCT_SPA:%.*]], align 4 -// CHECK-A64_32-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_SPA]], ptr [[S]], i32 0, i32 0 -// CHECK-A64_32-NEXT: [[COERCE_VAL_II:%.*]] = trunc i64 [[S_COERCE]] to i32 -// CHECK-A64_32-NEXT: store i32 [[COERCE_VAL_II]], ptr [[COERCE_DIVE]], align 4 -// CHECK-A64_32-NEXT: [[XS:%.*]] = getelementptr inbounds nuw [[STRUCT_SPA]], ptr [[S]], i32 0, i32 0 -// CHECK-A64_32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1 x ptr], ptr [[XS]], i32 0, i32 0 -// CHECK-A64_32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 4 -// CHECK-A64_32-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64_32-NEXT: ret void -// -void Tpa(Spa s) { *s.xs[0] = 1; } - -struct Spa2 { - int* xs[2]; -}; -// CHECK-A64-LABEL: define dso_local void @_Z4Tpa24Spa2( -// CHECK-A64-SAME: [2 x i64] [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64-NEXT: [[ENTRY:.*:]] -// CHECK-A64-NEXT: [[S:%.*]] = alloca [[STRUCT_SPA2:%.*]], align 8 -// CHECK-A64-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_SPA2]], ptr [[S]], i32 0, i32 0 -// CHECK-A64-NEXT: store [2 x i64] [[S_COERCE]], ptr [[COERCE_DIVE]], align 8 -// CHECK-A64-NEXT: [[XS:%.*]] = getelementptr inbounds nuw [[STRUCT_SPA2]], ptr [[S]], i32 0, i32 0 -// CHECK-A64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x ptr], ptr [[XS]], i64 0, i64 0 -// CHECK-A64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK-A64-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64-NEXT: ret void -// -// CHECK-A64_32-LABEL: define void @_Z4Tpa24Spa2( -// CHECK-A64_32-SAME: i64 [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64_32-NEXT: [[ENTRY:.*:]] -// CHECK-A64_32-NEXT: [[S:%.*]] = alloca [[STRUCT_SPA2:%.*]], align 4 -// CHECK-A64_32-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_SPA2]], ptr [[S]], i32 0, i32 0 -// CHECK-A64_32-NEXT: store i64 [[S_COERCE]], ptr [[COERCE_DIVE]], align 4 -// CHECK-A64_32-NEXT: [[XS:%.*]] = getelementptr inbounds nuw [[STRUCT_SPA2]], ptr [[S]], i32 0, i32 0 -// CHECK-A64_32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x ptr], ptr [[XS]], i32 0, i32 0 -// CHECK-A64_32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 4 -// CHECK-A64_32-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64_32-NEXT: ret void -// -void Tpa2(Spa2 s) { *s.xs[0] = 1; } - -struct Spa3 { - int* xs[3]; -}; -// CHECK-A64-LABEL: define dso_local void @_Z4Tpa34Spa3( -// CHECK-A64-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] { -// CHECK-A64-NEXT: [[ENTRY:.*:]] -// CHECK-A64-NEXT: [[S_INDIRECT_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-A64-NEXT: store ptr [[S]], ptr [[S_INDIRECT_ADDR]], align 8 -// CHECK-A64-NEXT: [[XS:%.*]] = getelementptr inbounds nuw [[STRUCT_SPA3:%.*]], ptr [[S]], i32 0, i32 0 -// CHECK-A64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x ptr], ptr [[XS]], i64 0, i64 0 -// CHECK-A64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK-A64-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64-NEXT: ret void -// -// CHECK-A64_32-LABEL: define void @_Z4Tpa34Spa3( -// CHECK-A64_32-SAME: [2 x i64] [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64_32-NEXT: [[ENTRY:.*:]] -// CHECK-A64_32-NEXT: [[S:%.*]] = alloca [[STRUCT_SPA3:%.*]], align 4 -// CHECK-A64_32-NEXT: [[TMP_COERCE:%.*]] = alloca [2 x i64], align 8 -// CHECK-A64_32-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_SPA3]], ptr [[S]], i32 0, i32 0 -// CHECK-A64_32-NEXT: store [2 x i64] [[S_COERCE]], ptr [[TMP_COERCE]], align 8 -// CHECK-A64_32-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[COERCE_DIVE]], ptr align 8 [[TMP_COERCE]], i32 12, i1 false) -// CHECK-A64_32-NEXT: [[XS:%.*]] = getelementptr inbounds nuw [[STRUCT_SPA3]], ptr [[S]], i32 0, i32 0 -// CHECK-A64_32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x ptr], ptr [[XS]], i32 0, i32 0 -// CHECK-A64_32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX]], align 4 -// CHECK-A64_32-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64_32-NEXT: ret void -// -void Tpa3(Spa3 s) { *s.xs[0] = 1; } - - -struct __attribute__((aligned(16))) Spp_align16 { - int *x, *y; -}; -// CHECK-A64-LABEL: define dso_local void @_Z11Tpp_align1611Spp_align16( -// CHECK-A64-SAME: [2 x i64] [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64-NEXT: [[ENTRY:.*:]] -// CHECK-A64-NEXT: [[S:%.*]] = alloca [[STRUCT_SPP_ALIGN16:%.*]], align 16 -// CHECK-A64-NEXT: store [2 x i64] [[S_COERCE]], ptr [[S]], align 16 -// CHECK-A64-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SPP_ALIGN16]], ptr [[S]], i32 0, i32 0 -// CHECK-A64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X]], align 16 -// CHECK-A64-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64-NEXT: ret void -// -// CHECK-A64_32-LABEL: define void @_Z11Tpp_align1611Spp_align16( -// CHECK-A64_32-SAME: [2 x i64] [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64_32-NEXT: [[ENTRY:.*:]] -// CHECK-A64_32-NEXT: [[S:%.*]] = alloca [[STRUCT_SPP_ALIGN16:%.*]], align 16 -// CHECK-A64_32-NEXT: store [2 x i64] [[S_COERCE]], ptr [[S]], align 16 -// CHECK-A64_32-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SPP_ALIGN16]], ptr [[S]], i32 0, i32 0 -// CHECK-A64_32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X]], align 16 -// CHECK-A64_32-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64_32-NEXT: ret void -// -void Tpp_align16(Spp_align16 s) { *s.x = 1; } - -struct SSpp_align16 { - Spp_align16 a; -}; -// CHECK-A64-LABEL: define dso_local void @_Z12TSpp_align1612SSpp_align16( -// CHECK-A64-SAME: i128 [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64-NEXT: [[ENTRY:.*:]] -// CHECK-A64-NEXT: [[S:%.*]] = alloca [[STRUCT_SSPP_ALIGN16:%.*]], align 16 -// CHECK-A64-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_SSPP_ALIGN16]], ptr [[S]], i32 0, i32 0 -// CHECK-A64-NEXT: store i128 [[S_COERCE]], ptr [[COERCE_DIVE]], align 16 -// CHECK-A64-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_SSPP_ALIGN16]], ptr [[S]], i32 0, i32 0 -// CHECK-A64-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SPP_ALIGN16:%.*]], ptr [[A]], i32 0, i32 0 -// CHECK-A64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X]], align 16 -// CHECK-A64-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64-NEXT: ret void -// -// CHECK-A64_32-LABEL: define void @_Z12TSpp_align1612SSpp_align16( -// CHECK-A64_32-SAME: i128 [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64_32-NEXT: [[ENTRY:.*:]] -// CHECK-A64_32-NEXT: [[S:%.*]] = alloca [[STRUCT_SSPP_ALIGN16:%.*]], align 16 -// CHECK-A64_32-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_SSPP_ALIGN16]], ptr [[S]], i32 0, i32 0 -// CHECK-A64_32-NEXT: store i128 [[S_COERCE]], ptr [[COERCE_DIVE]], align 16 -// CHECK-A64_32-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_SSPP_ALIGN16]], ptr [[S]], i32 0, i32 0 -// CHECK-A64_32-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SPP_ALIGN16:%.*]], ptr [[A]], i32 0, i32 0 -// CHECK-A64_32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X]], align 16 -// CHECK-A64_32-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64_32-NEXT: ret void -// -void TSpp_align16(SSpp_align16 s) { *s.a.x = 1; } - - -struct Sempty { -}; -// CHECK-A64-LABEL: define dso_local void @_Z6Tempty6Sempty( -// CHECK-A64-SAME: i8 [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64-NEXT: [[ENTRY:.*:]] -// CHECK-A64-NEXT: [[S:%.*]] = alloca [[STRUCT_SEMPTY:%.*]], align 1 -// CHECK-A64-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_SEMPTY]], ptr [[S]], i32 0, i32 0 -// CHECK-A64-NEXT: store i8 [[S_COERCE]], ptr [[COERCE_DIVE]], align 1 -// CHECK-A64-NEXT: ret void -// -// CHECK-A64_32-LABEL: define void @_Z6Tempty6Sempty( -// CHECK-A64_32-SAME: i8 [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64_32-NEXT: [[ENTRY:.*:]] -// CHECK-A64_32-NEXT: [[S:%.*]] = alloca [[STRUCT_SEMPTY:%.*]], align 1 -// CHECK-A64_32-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_SEMPTY]], ptr [[S]], i32 0, i32 0 -// CHECK-A64_32-NEXT: store i8 [[S_COERCE]], ptr [[COERCE_DIVE]], align 1 -// CHECK-A64_32-NEXT: ret void -// -void Tempty(Sempty s) { } - - -struct SpSempty { - Sempty y; - int *x; -}; -// CHECK-A64-LABEL: define dso_local void @_Z8TpSempty8SpSempty( -// CHECK-A64-SAME: [2 x i64] [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64-NEXT: [[ENTRY:.*:]] -// CHECK-A64-NEXT: [[S:%.*]] = alloca [[STRUCT_SPSEMPTY:%.*]], align 8 -// CHECK-A64-NEXT: store [2 x i64] [[S_COERCE]], ptr [[S]], align 8 -// CHECK-A64-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SPSEMPTY]], ptr [[S]], i32 0, i32 1 -// CHECK-A64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X]], align 8 -// CHECK-A64-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64-NEXT: ret void -// -// CHECK-A64_32-LABEL: define void @_Z8TpSempty8SpSempty( -// CHECK-A64_32-SAME: i64 [[S_COERCE:%.*]]) #[[ATTR0]] { -// CHECK-A64_32-NEXT: [[ENTRY:.*:]] -// CHECK-A64_32-NEXT: [[S:%.*]] = alloca [[STRUCT_SPSEMPTY:%.*]], align 4 -// CHECK-A64_32-NEXT: store i64 [[S_COERCE]], ptr [[S]], align 4 -// CHECK-A64_32-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SPSEMPTY]], ptr [[S]], i32 0, i32 1 -// CHECK-A64_32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X]], align 4 -// CHECK-A64_32-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-A64_32-NEXT: ret void -// -void TpSempty(SpSempty s) { *s.x = 1; } diff --git a/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c b/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c index 3ab065d34bcfb..e2f02dc64f766 100644 --- a/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c +++ b/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-call.c @@ -15,12 +15,24 @@ typedef vbool64_t fixed_bool64_t __attribute__((riscv_rvv_vector_bits(__riscv_v_ // CHECK-64-LABEL: @call_bool32_ff( // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[TMP2:%.*]] = tail call @llvm.riscv.vmand.nxv2i1.i64( [[TMP0:%.*]], [[TMP1:%.*]], i64 2) +// CHECK-64-NEXT: [[SAVED_VALUE4:%.*]] = alloca , align 1 +// CHECK-64-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 1 +// CHECK-64-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv2i1.i64( [[OP1_COERCE:%.*]], [[OP2_COERCE:%.*]], i64 2) +// CHECK-64-NEXT: store [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA6:![0-9]+]] +// CHECK-64-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA10:![0-9]+]] +// CHECK-64-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1 +// CHECK-64-NEXT: [[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 // CHECK-64-NEXT: ret [[TMP2]] // // CHECK-128-LABEL: @call_bool32_ff( // CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[TMP2:%.*]] = tail call @llvm.riscv.vmand.nxv2i1.i64( [[TMP0:%.*]], [[TMP1:%.*]], i64 4) +// CHECK-128-NEXT: [[SAVED_VALUE4:%.*]] = alloca , align 1 +// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 1 +// CHECK-128-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv2i1.i64( [[OP1_COERCE:%.*]], [[OP2_COERCE:%.*]], i64 4) +// CHECK-128-NEXT: store [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA6:![0-9]+]] +// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA10:![0-9]+]] +// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1 +// CHECK-128-NEXT: [[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT: ret [[TMP2]] // fixed_bool32_t call_bool32_ff(fixed_bool32_t op1, fixed_bool32_t op2) { @@ -29,12 +41,24 @@ fixed_bool32_t call_bool32_ff(fixed_bool32_t op1, fixed_bool32_t op2) { // CHECK-64-LABEL: @call_bool64_ff( // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[TMP2:%.*]] = tail call @llvm.riscv.vmand.nxv1i1.i64( [[TMP0:%.*]], [[TMP1:%.*]], i64 1) +// CHECK-64-NEXT: [[SAVED_VALUE4:%.*]] = alloca , align 1 +// CHECK-64-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 1 +// CHECK-64-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv1i1.i64( [[OP1_COERCE:%.*]], [[OP2_COERCE:%.*]], i64 1) +// CHECK-64-NEXT: store [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA11:![0-9]+]] +// CHECK-64-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA10]] +// CHECK-64-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1 +// CHECK-64-NEXT: [[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 // CHECK-64-NEXT: ret [[TMP2]] // // CHECK-128-LABEL: @call_bool64_ff( // CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[TMP2:%.*]] = tail call @llvm.riscv.vmand.nxv1i1.i64( [[TMP0:%.*]], [[TMP1:%.*]], i64 2) +// CHECK-128-NEXT: [[SAVED_VALUE4:%.*]] = alloca , align 1 +// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 1 +// CHECK-128-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv1i1.i64( [[OP1_COERCE:%.*]], [[OP2_COERCE:%.*]], i64 2) +// CHECK-128-NEXT: store [[TMP0]], ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA11:![0-9]+]] +// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE4]], align 1, !tbaa [[TBAA10]] +// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1 +// CHECK-128-NEXT: [[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 // CHECK-128-NEXT: ret [[TMP2]] // fixed_bool64_t call_bool64_ff(fixed_bool64_t op1, fixed_bool64_t op2) { @@ -47,13 +71,25 @@ fixed_bool64_t call_bool64_ff(fixed_bool64_t op1, fixed_bool64_t op2) { // CHECK-64-LABEL: @call_bool32_fs( // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = tail call @llvm.riscv.vmand.nxv2i1.i64( [[TMP0:%.*]], [[OP2:%.*]], i64 2) -// CHECK-64-NEXT: ret [[TMP1]] +// CHECK-64-NEXT: [[SAVED_VALUE2:%.*]] = alloca , align 1 +// CHECK-64-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 1 +// CHECK-64-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv2i1.i64( [[OP1_COERCE:%.*]], [[OP2:%.*]], i64 2) +// CHECK-64-NEXT: store [[TMP0]], ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA6]] +// CHECK-64-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA10]] +// CHECK-64-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1 +// CHECK-64-NEXT: [[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 +// CHECK-64-NEXT: ret [[TMP2]] // // CHECK-128-LABEL: @call_bool32_fs( // CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[TMP1:%.*]] = tail call @llvm.riscv.vmand.nxv2i1.i64( [[TMP0:%.*]], [[OP2:%.*]], i64 4) -// CHECK-128-NEXT: ret [[TMP1]] +// CHECK-128-NEXT: [[SAVED_VALUE2:%.*]] = alloca , align 1 +// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 1 +// CHECK-128-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv2i1.i64( [[OP1_COERCE:%.*]], [[OP2:%.*]], i64 4) +// CHECK-128-NEXT: store [[TMP0]], ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA6]] +// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA10]] +// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1 +// CHECK-128-NEXT: [[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 +// CHECK-128-NEXT: ret [[TMP2]] // fixed_bool32_t call_bool32_fs(fixed_bool32_t op1, vbool32_t op2) { return __riscv_vmand(op1, op2, __riscv_v_fixed_vlen / 32); @@ -61,13 +97,25 @@ fixed_bool32_t call_bool32_fs(fixed_bool32_t op1, vbool32_t op2) { // CHECK-64-LABEL: @call_bool64_fs( // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = tail call @llvm.riscv.vmand.nxv1i1.i64( [[TMP0:%.*]], [[OP2:%.*]], i64 1) -// CHECK-64-NEXT: ret [[TMP1]] +// CHECK-64-NEXT: [[SAVED_VALUE2:%.*]] = alloca , align 1 +// CHECK-64-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 1 +// CHECK-64-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv1i1.i64( [[OP1_COERCE:%.*]], [[OP2:%.*]], i64 1) +// CHECK-64-NEXT: store [[TMP0]], ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA11]] +// CHECK-64-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA10]] +// CHECK-64-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1 +// CHECK-64-NEXT: [[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 +// CHECK-64-NEXT: ret [[TMP2]] // // CHECK-128-LABEL: @call_bool64_fs( // CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[TMP1:%.*]] = tail call @llvm.riscv.vmand.nxv1i1.i64( [[TMP0:%.*]], [[OP2:%.*]], i64 2) -// CHECK-128-NEXT: ret [[TMP1]] +// CHECK-128-NEXT: [[SAVED_VALUE2:%.*]] = alloca , align 1 +// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 1 +// CHECK-128-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv1i1.i64( [[OP1_COERCE:%.*]], [[OP2:%.*]], i64 2) +// CHECK-128-NEXT: store [[TMP0]], ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA11]] +// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE2]], align 1, !tbaa [[TBAA10]] +// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1 +// CHECK-128-NEXT: [[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 +// CHECK-128-NEXT: ret [[TMP2]] // fixed_bool64_t call_bool64_fs(fixed_bool64_t op1, vbool64_t op2) { return __riscv_vmand(op1, op2, __riscv_v_fixed_vlen / 64); @@ -79,13 +127,25 @@ fixed_bool64_t call_bool64_fs(fixed_bool64_t op1, vbool64_t op2) { // CHECK-64-LABEL: @call_bool32_ss( // CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 1 +// CHECK-64-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 1 // CHECK-64-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv2i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 2) -// CHECK-64-NEXT: ret [[TMP0]] +// CHECK-64-NEXT: store [[TMP0]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]] +// CHECK-64-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]] +// CHECK-64-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1 +// CHECK-64-NEXT: [[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 +// CHECK-64-NEXT: ret [[TMP2]] // // CHECK-128-LABEL: @call_bool32_ss( // CHECK-128-NEXT: entry: +// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 1 +// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 1 // CHECK-128-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv2i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 4) -// CHECK-128-NEXT: ret [[TMP0]] +// CHECK-128-NEXT: store [[TMP0]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]] +// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]] +// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1 +// CHECK-128-NEXT: [[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 +// CHECK-128-NEXT: ret [[TMP2]] // fixed_bool32_t call_bool32_ss(vbool32_t op1, vbool32_t op2) { return __riscv_vmand(op1, op2, __riscv_v_fixed_vlen / 32); @@ -93,13 +153,25 @@ fixed_bool32_t call_bool32_ss(vbool32_t op1, vbool32_t op2) { // CHECK-64-LABEL: @call_bool64_ss( // CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 1 +// CHECK-64-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 1 // CHECK-64-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv1i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 1) -// CHECK-64-NEXT: ret [[TMP0]] +// CHECK-64-NEXT: store [[TMP0]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA11]] +// CHECK-64-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]] +// CHECK-64-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1 +// CHECK-64-NEXT: [[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 +// CHECK-64-NEXT: ret [[TMP2]] // // CHECK-128-LABEL: @call_bool64_ss( // CHECK-128-NEXT: entry: +// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 1 +// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 1 // CHECK-128-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vmand.nxv1i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 2) -// CHECK-128-NEXT: ret [[TMP0]] +// CHECK-128-NEXT: store [[TMP0]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA11]] +// CHECK-128-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]] +// CHECK-128-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL_COERCE]], align 1 +// CHECK-128-NEXT: [[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 +// CHECK-128-NEXT: ret [[TMP2]] // fixed_bool64_t call_bool64_ss(vbool64_t op1, vbool64_t op2) { return __riscv_vmand(op1, op2, __riscv_v_fixed_vlen / 64); diff --git a/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-cast.c b/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-cast.c index 8407c065adb21..f0fa7e8d07b4d 100644 --- a/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-cast.c +++ b/clang/test/CodeGen/RISCV/attr-riscv-rvv-vector-bits-less-8-cast.c @@ -29,22 +29,46 @@ fixed_bool8_t from_vbool8_t(vbool8_t type) { // CHECK-64-LABEL: @from_vbool16_t( // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: ret [[TYPE:%.*]] +// CHECK-64-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 1 +// CHECK-64-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 1 +// CHECK-64-NEXT: store [[TYPE:%.*]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6:![0-9]+]] +// CHECK-64-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10:![0-9]+]] +// CHECK-64-NEXT: store <1 x i8> [[TMP0]], ptr [[RETVAL_COERCE]], align 1 +// CHECK-64-NEXT: [[TMP1:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 +// CHECK-64-NEXT: ret [[TMP1]] // // CHECK-128-LABEL: @from_vbool16_t( // CHECK-128-NEXT: entry: -// CHECK-128-NEXT: ret [[TYPE:%.*]] +// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 1 +// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 1 +// CHECK-128-NEXT: store [[TYPE:%.*]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6:![0-9]+]] +// CHECK-128-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10:![0-9]+]] +// CHECK-128-NEXT: store <1 x i8> [[TMP0]], ptr [[RETVAL_COERCE]], align 1 +// CHECK-128-NEXT: [[TMP1:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 +// CHECK-128-NEXT: ret [[TMP1]] // fixed_bool16_t from_vbool16_t(vbool16_t type) { return type; } // CHECK-64-LABEL: @from_vbool32_t( // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: ret [[TYPE:%.*]] +// CHECK-64-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 1 +// CHECK-64-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 1 +// CHECK-64-NEXT: store [[TYPE:%.*]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA11:![0-9]+]] +// CHECK-64-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]] +// CHECK-64-NEXT: store <1 x i8> [[TMP0]], ptr [[RETVAL_COERCE]], align 1 +// CHECK-64-NEXT: [[TMP1:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 +// CHECK-64-NEXT: ret [[TMP1]] // // CHECK-128-LABEL: @from_vbool32_t( // CHECK-128-NEXT: entry: -// CHECK-128-NEXT: ret [[TYPE:%.*]] +// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 1 +// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 1 +// CHECK-128-NEXT: store [[TYPE:%.*]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA11:![0-9]+]] +// CHECK-128-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]] +// CHECK-128-NEXT: store <1 x i8> [[TMP0]], ptr [[RETVAL_COERCE]], align 1 +// CHECK-128-NEXT: [[TMP1:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 +// CHECK-128-NEXT: ret [[TMP1]] // fixed_bool32_t from_vbool32_t(vbool32_t type) { return type; @@ -52,11 +76,11 @@ fixed_bool32_t from_vbool32_t(vbool32_t type) { // CHECK-64-LABEL: @to_vbool32_t( // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: ret [[TMP0:%.*]] +// CHECK-64-NEXT: ret [[TYPE_COERCE:%.*]] // // CHECK-128-LABEL: @to_vbool32_t( // CHECK-128-NEXT: entry: -// CHECK-128-NEXT: ret [[TMP0:%.*]] +// CHECK-128-NEXT: ret [[TYPE_COERCE:%.*]] // vbool32_t to_vbool32_t(fixed_bool32_t type) { return type; @@ -64,11 +88,23 @@ vbool32_t to_vbool32_t(fixed_bool32_t type) { // CHECK-64-LABEL: @from_vbool64_t( // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: ret [[TYPE:%.*]] +// CHECK-64-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 1 +// CHECK-64-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 1 +// CHECK-64-NEXT: store [[TYPE:%.*]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA13:![0-9]+]] +// CHECK-64-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]] +// CHECK-64-NEXT: store <1 x i8> [[TMP0]], ptr [[RETVAL_COERCE]], align 1 +// CHECK-64-NEXT: [[TMP1:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 +// CHECK-64-NEXT: ret [[TMP1]] // // CHECK-128-LABEL: @from_vbool64_t( // CHECK-128-NEXT: entry: -// CHECK-128-NEXT: ret [[TYPE:%.*]] +// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 1 +// CHECK-128-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 1 +// CHECK-128-NEXT: store [[TYPE:%.*]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA13:![0-9]+]] +// CHECK-128-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA10]] +// CHECK-128-NEXT: store <1 x i8> [[TMP0]], ptr [[RETVAL_COERCE]], align 1 +// CHECK-128-NEXT: [[TMP1:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 +// CHECK-128-NEXT: ret [[TMP1]] // fixed_bool64_t from_vbool64_t(vbool64_t type) { return type; @@ -76,11 +112,11 @@ fixed_bool64_t from_vbool64_t(vbool64_t type) { // CHECK-64-LABEL: @to_vbool64_t( // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: ret [[TMP0:%.*]] +// CHECK-64-NEXT: ret [[TYPE_COERCE:%.*]] // // CHECK-128-LABEL: @to_vbool64_t( // CHECK-128-NEXT: entry: -// CHECK-128-NEXT: ret [[TMP0:%.*]] +// CHECK-128-NEXT: ret [[TYPE_COERCE:%.*]] // vbool64_t to_vbool64_t(fixed_bool64_t type) { return type; diff --git a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-bitcast-less-8.c b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-bitcast-less-8.c index 45a099dc9c678..058ec49b77881 100644 --- a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-bitcast-less-8.c +++ b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-bitcast-less-8.c @@ -55,12 +55,12 @@ DEFINE_STRUCT(bool64) // CHECK-128-LABEL: @read_bool32( // CHECK-128-NEXT: entry: +// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1 // CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 1 // CHECK-128-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[Y]], align 1, !tbaa [[TBAA6:![0-9]+]] -// CHECK-128-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv1i8.v1i8( poison, <1 x i8> [[TMP0]], i64 0) -// CHECK-128-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to -// CHECK-128-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i1.nxv8i1( [[TMP1]], i64 0) -// CHECK-128-NEXT: ret [[TMP2]] +// CHECK-128-NEXT: store <1 x i8> [[TMP0]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]] +// CHECK-128-NEXT: [[TMP1:%.*]] = load , ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]] +// CHECK-128-NEXT: ret [[TMP1]] // vbool32_t read_bool32(struct struct_bool32 *s) { return s->y[0]; @@ -68,11 +68,11 @@ vbool32_t read_bool32(struct struct_bool32 *s) { // CHECK-128-LABEL: @write_bool32( // CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8i1.nxv2i1( zeroinitializer, [[X:%.*]], i64 0) -// CHECK-128-NEXT: [[TMP1:%.*]] = bitcast [[TMP0]] to -// CHECK-128-NEXT: [[CAST_FIXED:%.*]] = tail call <1 x i8> @llvm.vector.extract.v1i8.nxv1i8( [[TMP1]], i64 0) +// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 1 +// CHECK-128-NEXT: store [[X:%.*]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA9:![0-9]+]] +// CHECK-128-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]] // CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 1 -// CHECK-128-NEXT: store <1 x i8> [[CAST_FIXED]], ptr [[Y]], align 1, !tbaa [[TBAA6]] +// CHECK-128-NEXT: store <1 x i8> [[TMP0]], ptr [[Y]], align 1, !tbaa [[TBAA6]] // CHECK-128-NEXT: ret void // void write_bool32(struct struct_bool32 *s, vbool32_t x) { @@ -81,12 +81,12 @@ void write_bool32(struct struct_bool32 *s, vbool32_t x) { // CHECK-128-LABEL: @read_bool64( // CHECK-128-NEXT: entry: +// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1 // CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 1 // CHECK-128-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[Y]], align 1, !tbaa [[TBAA6]] -// CHECK-128-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv1i8.v1i8( poison, <1 x i8> [[TMP0]], i64 0) -// CHECK-128-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to -// CHECK-128-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv1i1.nxv8i1( [[TMP1]], i64 0) -// CHECK-128-NEXT: ret [[TMP2]] +// CHECK-128-NEXT: store <1 x i8> [[TMP0]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]] +// CHECK-128-NEXT: [[TMP1:%.*]] = load , ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]] +// CHECK-128-NEXT: ret [[TMP1]] // vbool64_t read_bool64(struct struct_bool64 *s) { return s->y[0]; @@ -94,11 +94,11 @@ vbool64_t read_bool64(struct struct_bool64 *s) { // CHECK-128-LABEL: @write_bool64( // CHECK-128-NEXT: entry: -// CHECK-128-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8i1.nxv1i1( zeroinitializer, [[X:%.*]], i64 0) -// CHECK-128-NEXT: [[TMP1:%.*]] = bitcast [[TMP0]] to -// CHECK-128-NEXT: [[CAST_FIXED:%.*]] = tail call <1 x i8> @llvm.vector.extract.v1i8.nxv1i8( [[TMP1]], i64 0) +// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 1 +// CHECK-128-NEXT: store [[X:%.*]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA11:![0-9]+]] +// CHECK-128-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]] // CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[S:%.*]], i64 1 -// CHECK-128-NEXT: store <1 x i8> [[CAST_FIXED]], ptr [[Y]], align 1, !tbaa [[TBAA6]] +// CHECK-128-NEXT: store <1 x i8> [[TMP0]], ptr [[Y]], align 1, !tbaa [[TBAA6]] // CHECK-128-NEXT: ret void // void write_bool64(struct struct_bool64 *s, vbool64_t x) { diff --git a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c index 0a50e41dda7e1..7992951346d54 100644 --- a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c +++ b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-cast.c @@ -97,7 +97,13 @@ vbool4_t to_vbool4_t(fixed_bool4_t type) { // CHECK-LABEL: @from_vbool32_t( // CHECK-NEXT: entry: -// CHECK-NEXT: ret [[TYPE:%.*]] +// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 1 +// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 1 +// CHECK-NEXT: store [[TYPE:%.*]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA4:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA8:![0-9]+]] +// CHECK-NEXT: store <1 x i8> [[TMP0]], ptr [[RETVAL_COERCE]], align 1 +// CHECK-NEXT: [[TMP1:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 +// CHECK-NEXT: ret [[TMP1]] // fixed_bool32_t from_vbool32_t(vbool32_t type) { return type; @@ -105,7 +111,7 @@ fixed_bool32_t from_vbool32_t(vbool32_t type) { // CHECK-LABEL: @to_vbool32_t( // CHECK-NEXT: entry: -// CHECK-NEXT: ret [[TMP0:%.*]] +// CHECK-NEXT: ret [[TYPE_COERCE:%.*]] // vbool32_t to_vbool32_t(fixed_bool32_t type) { return type; @@ -113,7 +119,7 @@ vbool32_t to_vbool32_t(fixed_bool32_t type) { // CHECK-LABEL: @to_vint32m1_t__from_gnu_int32m1_t( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TYPE:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA6:![0-9]+]] +// CHECK-NEXT: [[TYPE:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA8]] // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2i32.v8i32( poison, <8 x i32> [[TYPE]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -124,7 +130,7 @@ vint32m1_t to_vint32m1_t__from_gnu_int32m1_t(gnu_int32m1_t type) { // CHECK-LABEL: @from_vint32m1_t__to_gnu_int32m1_t( // CHECK-NEXT: entry: // CHECK-NEXT: [[CAST_FIXED:%.*]] = tail call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[TYPE:%.*]], i64 0) -// CHECK-NEXT: store <8 x i32> [[CAST_FIXED]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA6]] +// CHECK-NEXT: store <8 x i32> [[CAST_FIXED]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA8]] // CHECK-NEXT: ret void // gnu_int32m1_t from_vint32m1_t__to_gnu_int32m1_t(vint32m1_t type) { @@ -133,7 +139,7 @@ gnu_int32m1_t from_vint32m1_t__to_gnu_int32m1_t(vint32m1_t type) { // CHECK-LABEL: @to_fixed_int32m1_t__from_gnu_int32m1_t( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TYPE:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA6]] +// CHECK-NEXT: [[TYPE:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA8]] // CHECK-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv2i32.v8i32( poison, <8 x i32> [[TYPE]], i64 0) // CHECK-NEXT: ret [[CAST_SCALABLE]] // @@ -144,7 +150,7 @@ fixed_int32m1_t to_fixed_int32m1_t__from_gnu_int32m1_t(gnu_int32m1_t type) { // CHECK-LABEL: @from_fixed_int32m1_t__to_gnu_int32m1_t( // CHECK-NEXT: entry: // CHECK-NEXT: [[TYPE:%.*]] = tail call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[TYPE_COERCE:%.*]], i64 0) -// CHECK-NEXT: store <8 x i32> [[TYPE]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA6]] +// CHECK-NEXT: store <8 x i32> [[TYPE]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA8]] // CHECK-NEXT: ret void // gnu_int32m1_t from_fixed_int32m1_t__to_gnu_int32m1_t(fixed_int32m1_t type) { diff --git a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-codegen.c b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-codegen.c index f01e6caeefd43..d81855aea2e5e 100644 --- a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-codegen.c +++ b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-codegen.c @@ -113,25 +113,25 @@ fixed_int16m4_t test_bool4(vbool4_t m, vint16m4_t vec) { // CHECK-NEXT: [[M_ADDR:%.*]] = alloca , align 1 // CHECK-NEXT: [[VEC_ADDR:%.*]] = alloca , align 4 // CHECK-NEXT: [[MASK:%.*]] = alloca , align 1 +// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1 // CHECK-NEXT: store [[M:%.*]], ptr [[M_ADDR]], align 1 // CHECK-NEXT: store [[VEC:%.*]], ptr [[VEC_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[M_ADDR]], align 1 // CHECK-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr @global_bool32, align 1 -// CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv1i8.v1i8( poison, <1 x i8> [[TMP1]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = bitcast [[CAST_SCALABLE]] to -// CHECK-NEXT: [[TMP3:%.*]] = call @llvm.vector.extract.nxv2i1.nxv8i1( [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP4:%.*]] = call @llvm.riscv.vmand.nxv2i1.i64( [[TMP0]], [[TMP3]], i64 8) -// CHECK-NEXT: store [[TMP4]], ptr [[MASK]], align 1 -// CHECK-NEXT: [[TMP5:%.*]] = load , ptr [[MASK]], align 1 -// CHECK-NEXT: [[TMP6:%.*]] = load , ptr [[VEC_ADDR]], align 4 -// CHECK-NEXT: [[TMP7:%.*]] = load <8 x i32>, ptr @global_vec, align 8 -// CHECK-NEXT: [[CAST_SCALABLE1:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( poison, <8 x i32> [[TMP7]], i64 0) -// CHECK-NEXT: [[TMP8:%.*]] = call @llvm.riscv.vadd.mask.nxv2i32.nxv2i32.i64( poison, [[TMP6]], [[CAST_SCALABLE1]], [[TMP5]], i64 8, i64 3) -// CHECK-NEXT: [[CAST_FIXED:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[TMP8]], i64 0) +// CHECK-NEXT: store <1 x i8> [[TMP1]], ptr [[SAVED_VALUE]], align 1 +// CHECK-NEXT: [[TMP2:%.*]] = load , ptr [[SAVED_VALUE]], align 1 +// CHECK-NEXT: [[TMP3:%.*]] = call @llvm.riscv.vmand.nxv2i1.i64( [[TMP0]], [[TMP2]], i64 8) +// CHECK-NEXT: store [[TMP3]], ptr [[MASK]], align 1 +// CHECK-NEXT: [[TMP4:%.*]] = load , ptr [[MASK]], align 1 +// CHECK-NEXT: [[TMP5:%.*]] = load , ptr [[VEC_ADDR]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load <8 x i32>, ptr @global_vec, align 8 +// CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( poison, <8 x i32> [[TMP6]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = call @llvm.riscv.vadd.mask.nxv2i32.nxv2i32.i64( poison, [[TMP5]], [[CAST_SCALABLE]], [[TMP4]], i64 8, i64 3) +// CHECK-NEXT: [[CAST_FIXED:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[TMP7]], i64 0) // CHECK-NEXT: store <8 x i32> [[CAST_FIXED]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: [[TMP9:%.*]] = load <8 x i32>, ptr [[RETVAL]], align 8 -// CHECK-NEXT: [[CAST_SCALABLE2:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( poison, <8 x i32> [[TMP9]], i64 0) -// CHECK-NEXT: ret [[CAST_SCALABLE2]] +// CHECK-NEXT: [[TMP8:%.*]] = load <8 x i32>, ptr [[RETVAL]], align 8 +// CHECK-NEXT: [[CAST_SCALABLE1:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( poison, <8 x i32> [[TMP8]], i64 0) +// CHECK-NEXT: ret [[CAST_SCALABLE1]] // fixed_int32m1_t test_bool32(vbool32_t m, vint32m1_t vec) { vbool32_t mask = __riscv_vmand(m, global_bool32, __riscv_v_fixed_vlen/32); @@ -224,16 +224,15 @@ fixed_bool4_t address_of_array_idx_bool4() { // CHECK-NEXT: [[RETVAL:%.*]] = alloca <1 x i8>, align 1 // CHECK-NEXT: [[ARR:%.*]] = alloca [3 x <1 x i8>], align 1 // CHECK-NEXT: [[PARR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca , align 1 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i8>], ptr [[ARR]], i64 0, i64 0 // CHECK-NEXT: store ptr [[ARRAYIDX]], ptr [[PARR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PARR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[TMP0]], align 1 // CHECK-NEXT: store <1 x i8> [[TMP1]], ptr [[RETVAL]], align 1 -// CHECK-NEXT: [[TMP2:%.*]] = load <1 x i8>, ptr [[RETVAL]], align 1 -// CHECK-NEXT: [[CAST_SCALABLE:%.*]] = call @llvm.vector.insert.nxv1i8.v1i8( poison, <1 x i8> [[TMP2]], i64 0) -// CHECK-NEXT: [[TMP3:%.*]] = bitcast [[CAST_SCALABLE]] to -// CHECK-NEXT: [[TMP4:%.*]] = call @llvm.vector.extract.nxv2i1.nxv8i1( [[TMP3]], i64 0) -// CHECK-NEXT: ret [[TMP4]] +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[RETVAL_COERCE]], ptr align 1 [[RETVAL]], i64 1, i1 false) +// CHECK-NEXT: [[TMP2:%.*]] = load , ptr [[RETVAL_COERCE]], align 1 +// CHECK-NEXT: ret [[TMP2]] // fixed_bool32_t address_of_array_idx_bool32() { fixed_bool32_t arr[3]; diff --git a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-globals.c b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-globals.c index 92ba27fb65425..4bd6311e05b03 100644 --- a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-globals.c +++ b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-globals.c @@ -89,10 +89,10 @@ void write_global_bool4(vbool4_t v) { global_bool4 = v; } #if __riscv_v_fixed_vlen >= 256 // CHECK-256-LABEL: @write_global_bool32( // CHECK-256-NEXT: entry: -// CHECK-256-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.insert.nxv8i1.nxv2i1( zeroinitializer, [[V:%.*]], i64 0) -// CHECK-256-NEXT: [[TMP1:%.*]] = bitcast [[TMP0]] to -// CHECK-256-NEXT: [[CAST_FIXED:%.*]] = tail call <1 x i8> @llvm.vector.extract.v1i8.nxv1i8( [[TMP1]], i64 0) -// CHECK-256-NEXT: store <1 x i8> [[CAST_FIXED]], ptr @global_bool32, align 1, !tbaa [[TBAA6]] +// CHECK-256-NEXT: [[SAVED_VALUE:%.*]] = alloca , align 1 +// CHECK-256-NEXT: store [[V:%.*]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA9:![0-9]+]] +// CHECK-256-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]] +// CHECK-256-NEXT: store <1 x i8> [[TMP0]], ptr @global_bool32, align 1, !tbaa [[TBAA6]] // CHECK-256-NEXT: ret void // void write_global_bool32(vbool32_t v) { global_bool32 = v; } @@ -151,11 +151,11 @@ vbool4_t read_global_bool4() { return global_bool4; } #if __riscv_v_fixed_vlen >= 256 // CHECK-256-LABEL: @read_global_bool32( // CHECK-256-NEXT: entry: +// CHECK-256-NEXT: [[SAVED_VALUE:%.*]] = alloca <1 x i8>, align 1 // CHECK-256-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr @global_bool32, align 1, !tbaa [[TBAA6]] -// CHECK-256-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv1i8.v1i8( poison, <1 x i8> [[TMP0]], i64 0) -// CHECK-256-NEXT: [[TMP1:%.*]] = bitcast [[CAST_SCALABLE]] to -// CHECK-256-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i1.nxv8i1( [[TMP1]], i64 0) -// CHECK-256-NEXT: ret [[TMP2]] +// CHECK-256-NEXT: store <1 x i8> [[TMP0]], ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]] +// CHECK-256-NEXT: [[TMP1:%.*]] = load , ptr [[SAVED_VALUE]], align 1, !tbaa [[TBAA6]] +// CHECK-256-NEXT: ret [[TMP1]] // vbool32_t read_global_bool32() { return global_bool32; } #endif diff --git a/clang/test/CodeGen/arm-mfp8.c b/clang/test/CodeGen/arm-mfp8.c index 9385b537f18b3..d9e7b5d4707d8 100644 --- a/clang/test/CodeGen/arm-mfp8.c +++ b/clang/test/CodeGen/arm-mfp8.c @@ -38,22 +38,34 @@ mfloat8x8_t test_ret_mfloat8x8_t(mfloat8x8_t v) { // CHECK-C-LABEL: define dso_local <1 x i8> @func1n( // CHECK-C-SAME: <1 x i8> [[MFP8:%.*]]) #[[ATTR0]] { // CHECK-C-NEXT: [[ENTRY:.*:]] -// CHECK-C-NEXT: [[F1N:%.*]] = alloca [10 x <1 x i8>], align 1 -// CHECK-C-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x <1 x i8>], ptr [[F1N]], i64 0, i64 2 -// CHECK-C-NEXT: store <1 x i8> [[MFP8]], ptr [[ARRAYIDX]], align 1 -// CHECK-C-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x <1 x i8>], ptr [[F1N]], i64 0, i64 2 -// CHECK-C-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[ARRAYIDX1]], align 1 -// CHECK-C-NEXT: ret <1 x i8> [[TMP0]] +// CHECK-C-NEXT: [[RETVAL:%.*]] = alloca <1 x i8>, align 1 +// CHECK-C-NEXT: [[MFP8_ADDR:%.*]] = alloca i8, align 1 +// CHECK-C-NEXT: [[F1N:%.*]] = alloca [10 x i8], align 1 +// CHECK-C-NEXT: store <1 x i8> [[MFP8]], ptr [[MFP8_ADDR]], align 1 +// CHECK-C-NEXT: [[TMP0:%.*]] = load i8, ptr [[MFP8_ADDR]], align 1 +// CHECK-C-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i8], ptr [[F1N]], i64 0, i64 2 +// CHECK-C-NEXT: store i8 [[TMP0]], ptr [[ARRAYIDX]], align 1 +// CHECK-C-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x i8], ptr [[F1N]], i64 0, i64 2 +// CHECK-C-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +// CHECK-C-NEXT: store i8 [[TMP1]], ptr [[RETVAL]], align 1 +// CHECK-C-NEXT: [[TMP2:%.*]] = load <1 x i8>, ptr [[RETVAL]], align 1 +// CHECK-C-NEXT: ret <1 x i8> [[TMP2]] // // CHECK-CXX-LABEL: define dso_local <1 x i8> @_Z6func1nu6__mfp8( // CHECK-CXX-SAME: <1 x i8> [[MFP8:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: [[F1N:%.*]] = alloca [10 x <1 x i8>], align 1 -// CHECK-CXX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x <1 x i8>], ptr [[F1N]], i64 0, i64 2 -// CHECK-CXX-NEXT: store <1 x i8> [[MFP8]], ptr [[ARRAYIDX]], align 1 -// CHECK-CXX-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x <1 x i8>], ptr [[F1N]], i64 0, i64 2 -// CHECK-CXX-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[ARRAYIDX1]], align 1 -// CHECK-CXX-NEXT: ret <1 x i8> [[TMP0]] +// CHECK-CXX-NEXT: [[RETVAL:%.*]] = alloca <1 x i8>, align 1 +// CHECK-CXX-NEXT: [[MFP8_ADDR:%.*]] = alloca i8, align 1 +// CHECK-CXX-NEXT: [[F1N:%.*]] = alloca [10 x i8], align 1 +// CHECK-CXX-NEXT: store <1 x i8> [[MFP8]], ptr [[MFP8_ADDR]], align 1 +// CHECK-CXX-NEXT: [[TMP0:%.*]] = load i8, ptr [[MFP8_ADDR]], align 1 +// CHECK-CXX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i8], ptr [[F1N]], i64 0, i64 2 +// CHECK-CXX-NEXT: store i8 [[TMP0]], ptr [[ARRAYIDX]], align 1 +// CHECK-CXX-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x i8], ptr [[F1N]], i64 0, i64 2 +// CHECK-CXX-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +// CHECK-CXX-NEXT: store i8 [[TMP1]], ptr [[RETVAL]], align 1 +// CHECK-CXX-NEXT: [[TMP2:%.*]] = load <1 x i8>, ptr [[RETVAL]], align 1 +// CHECK-CXX-NEXT: ret <1 x i8> [[TMP2]] // __mfp8 func1n(__mfp8 mfp8) { __mfp8 f1n[10]; @@ -86,14 +98,18 @@ mfloat8_t test_extract_element(mfloat8x16_t x, int i) { // CHECK-C-LABEL: define dso_local <16 x i8> @test_insert_element( // CHECK-C-SAME: <16 x i8> [[X:%.*]], i32 noundef [[I:%.*]], <1 x i8> [[V:%.*]]) #[[ATTR0]] { // CHECK-C-NEXT: [[ENTRY:.*:]] -// CHECK-C-NEXT: [[TMP0:%.*]] = bitcast <1 x i8> [[V]] to i8 +// CHECK-C-NEXT: [[V_ADDR:%.*]] = alloca i8, align 1 +// CHECK-C-NEXT: store <1 x i8> [[V]], ptr [[V_ADDR]], align 1 +// CHECK-C-NEXT: [[TMP0:%.*]] = load i8, ptr [[V_ADDR]], align 1 // CHECK-C-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[X]], i8 [[TMP0]], i32 [[I]] // CHECK-C-NEXT: ret <16 x i8> [[VECINS]] // // CHECK-CXX-LABEL: define dso_local <16 x i8> @_Z19test_insert_element14__Mfloat8x16_tiu6__mfp8( // CHECK-CXX-SAME: <16 x i8> [[X:%.*]], i32 noundef [[I:%.*]], <1 x i8> [[V:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <1 x i8> [[V]] to i8 +// CHECK-CXX-NEXT: [[V_ADDR:%.*]] = alloca i8, align 1 +// CHECK-CXX-NEXT: store <1 x i8> [[V]], ptr [[V_ADDR]], align 1 +// CHECK-CXX-NEXT: [[TMP0:%.*]] = load i8, ptr [[V_ADDR]], align 1 // CHECK-CXX-NEXT: [[VECINS:%.*]] = insertelement <16 x i8> [[X]], i8 [[TMP0]], i32 [[I]] // CHECK-CXX-NEXT: ret <16 x i8> [[VECINS]] // diff --git a/clang/test/CodeGen/builtins-arm64.c b/clang/test/CodeGen/builtins-arm64.c index 86c2812434643..0913295b0c5f5 100644 --- a/clang/test/CodeGen/builtins-arm64.c +++ b/clang/test/CodeGen/builtins-arm64.c @@ -10,7 +10,7 @@ void f0(void *a, void *b) { void *tp (void) { return __builtin_thread_pointer (); -// CHECK-LINUX: call {{.*}} @llvm.thread.pointer.p0() +// CHECK-LINUX: call {{.*}} @llvm.thread.pointer() } // CHECK: call {{.*}} @llvm.bitreverse.i32(i32 %a) diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c index d8aff82b0c140..4a44a9a88df11 100644 --- a/clang/test/CodeGen/builtins-wasm.c +++ b/clang/test/CodeGen/builtins-wasm.c @@ -749,5 +749,5 @@ int externref_is_null(__externref_t arg) { void *tp (void) { return __builtin_thread_pointer (); - // WEBASSEMBLY: call {{.*}} @llvm.thread.pointer.p0() + // WEBASSEMBLY: call {{.*}} @llvm.thread.pointer() } diff --git a/clang/test/CodeGen/dllexport.c b/clang/test/CodeGen/dllexport.c index f64bcb5393005..4c1143cf5ca48 100644 --- a/clang/test/CodeGen/dllexport.c +++ b/clang/test/CodeGen/dllexport.c @@ -2,8 +2,6 @@ // RUN: %clang_cc1 -triple x86_64-windows-msvc -fms-extensions -emit-llvm -std=c11 -O0 -o - %s | FileCheck %s // RUN: %clang_cc1 -triple i686-windows-gnu -fms-extensions -emit-llvm -std=c11 -O0 -o - %s | FileCheck %s // RUN: %clang_cc1 -triple x86_64-windows-gnu -fms-extensions -emit-llvm -std=c11 -O0 -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple i686-pc-cygwin -fms-extensions -emit-llvm -std=c11 -O0 -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple x86_64-pc-cygwin -fms-extensions -emit-llvm -std=c11 -O0 -o - %s | FileCheck %s diff --git a/clang/test/CodeGen/dllimport.c b/clang/test/CodeGen/dllimport.c index 1631c6dc56805..6170c8c4a66a3 100644 --- a/clang/test/CodeGen/dllimport.c +++ b/clang/test/CodeGen/dllimport.c @@ -2,11 +2,8 @@ // RUN: %clang_cc1 -triple x86_64-windows-msvc -fms-extensions -emit-llvm -std=c11 -O0 -o - %s | FileCheck --check-prefix=CHECK --check-prefix=MS %s // RUN: %clang_cc1 -triple i686-windows-gnu -fms-extensions -emit-llvm -std=c11 -O0 -o - %s | FileCheck --check-prefix=CHECK --check-prefix=GNU %s // RUN: %clang_cc1 -triple x86_64-windows-gnu -fms-extensions -emit-llvm -std=c11 -O0 -o - %s | FileCheck --check-prefix=CHECK --check-prefix=GNU %s -// RUN: %clang_cc1 -triple i686-pc-cygwin -fms-extensions -emit-llvm -std=c11 -O0 -o - %s | FileCheck --check-prefix=CHECK --check-prefix=GNU %s -// RUN: %clang_cc1 -triple x86_64-pc-cygwin -fms-extensions -emit-llvm -std=c11 -O0 -o - %s | FileCheck --check-prefix=CHECK --check-prefix=GNU %s // RUN: %clang_cc1 -triple i686-windows-msvc -fms-extensions -emit-llvm -std=c11 -O1 -fno-inline -o - %s | FileCheck --check-prefix=O1 --check-prefix=MO1 %s // RUN: %clang_cc1 -triple i686-windows-gnu -fms-extensions -emit-llvm -std=c11 -O1 -fno-inline -o - %s | FileCheck --check-prefix=O1 --check-prefix=GO1 %s -// RUN: %clang_cc1 -triple i686-pc-cygwin -fms-extensions -emit-llvm -std=c11 -O1 -fno-inline -o - %s | FileCheck --check-prefix=O1 --check-prefix=GO1 %s #define JOIN2(x, y) x##y #define JOIN(x, y) JOIN2(x, y) diff --git a/clang/test/CodeGen/dso-local-executable.c b/clang/test/CodeGen/dso-local-executable.c index 880273df137d7..15575d3927f23 100644 --- a/clang/test/CodeGen/dso-local-executable.c +++ b/clang/test/CodeGen/dso-local-executable.c @@ -12,9 +12,6 @@ // RUN: %clang_cc1 -triple x86_64-w64-mingw32 -emit-llvm %s -o - | FileCheck --check-prefixes=MINGW,MINGW-NATIVE_TLS,MINGW-AUTO-IMPORT %s // RUN: %clang_cc1 -triple x86_64-w64-mingw32 -emit-llvm %s -o - -fno-auto-import | FileCheck --check-prefixes=MINGW,MINGW-NATIVE_TLS,MINGW-NO-AUTO-IMPORT %s // RUN: %clang_cc1 -triple x86_64-w64-mingw32 -emit-llvm %s -o - -femulated-tls | FileCheck --check-prefixes=MINGW,MINGW-EMUTLS,MINGW-AUTO-IMPORT %s -// RUN: %clang_cc1 -triple x86_64-pc-cygwin -emit-llvm %s -o - | FileCheck --check-prefixes=MINGW,MINGW-NATIVE_TLS,MINGW-AUTO-IMPORT %s -// RUN: %clang_cc1 -triple x86_64-pc-cygwin -emit-llvm %s -o - -fno-auto-import | FileCheck --check-prefixes=MINGW,MINGW-NATIVE_TLS,MINGW-NO-AUTO-IMPORT %s -// RUN: %clang_cc1 -triple x86_64-pc-cygwin -emit-llvm %s -o - -femulated-tls | FileCheck --check-prefixes=MINGW,MINGW-EMUTLS,MINGW-AUTO-IMPORT %s // MINGW: @baz = dso_local global i32 42 // MINGW-NEXT: @import_var = external dllimport global i32 // MINGW-NEXT: @weak_bar = extern_weak global i32 diff --git a/clang/test/CodeGenCXX/dllexport-members.cpp b/clang/test/CodeGenCXX/dllexport-members.cpp index 3753050cbf7d5..e4effa4c72c72 100644 --- a/clang/test/CodeGenCXX/dllexport-members.cpp +++ b/clang/test/CodeGenCXX/dllexport-members.cpp @@ -4,8 +4,6 @@ // RUN: %clang_cc1 -no-enable-noundef-analysis -triple x86_64-windows-msvc -fms-compatibility -fms-compatibility-version=19 -emit-llvm -std=c++1y -O0 -o - %s | FileCheck --check-prefix=M64VS2015 %s // RUN: %clang_cc1 -no-enable-noundef-analysis -triple i686-windows-gnu -emit-llvm -std=c++1y -O0 -o - %s | FileCheck --check-prefix=GNU --check-prefix=G32 %s // RUN: %clang_cc1 -no-enable-noundef-analysis -triple x86_64-windows-gnu -emit-llvm -std=c++1y -O0 -o - %s | FileCheck --check-prefix=GNU --check-prefix=G64 %s -// RUN: %clang_cc1 -no-enable-noundef-analysis -triple i686-pc-cygwin -emit-llvm -std=c++1y -O0 -o - %s | FileCheck --check-prefix=GNU --check-prefix=C32 %s -// RUN: %clang_cc1 -no-enable-noundef-analysis -triple x86_64-pc-cygwin -emit-llvm -std=c++1y -O0 -o - %s | FileCheck --check-prefix=GNU --check-prefix=G64 %s // Helper structs to make templates more expressive. struct ImplicitInst_Exported {}; @@ -37,16 +35,12 @@ struct ExportMembers { // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"?normalInlineDecl@ExportMembers@@QAEXXZ"(ptr {{[^,]*}} %this) // M64-DAG: define weak_odr dso_local dllexport void @"?normalInlineDecl@ExportMembers@@QEAAXXZ"(ptr {{[^,]*}} %this) // G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers9normalDefEv(ptr {{[^,]*}} %this) - // C32-DAG: define dso_local dllexport void @_ZN13ExportMembers9normalDefEv(ptr {{[^,]*}} %this) // G64-DAG: define dso_local dllexport void @_ZN13ExportMembers9normalDefEv(ptr {{[^,]*}} %this) // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers13normalInclassEv(ptr {{[^,]*}} %this) - // C32-DAG: define weak_odr dso_local dllexport void @_ZN13ExportMembers13normalInclassEv(ptr {{[^,]*}} %this) // G64-DAG: define weak_odr dso_local dllexport void @_ZN13ExportMembers13normalInclassEv(ptr {{[^,]*}} %this) // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers15normalInlineDefEv(ptr {{[^,]*}} %this) - // C32-DAG: define weak_odr dso_local dllexport void @_ZN13ExportMembers15normalInlineDefEv(ptr {{[^,]*}} %this) // G64-DAG: define weak_odr dso_local dllexport void @_ZN13ExportMembers15normalInlineDefEv(ptr {{[^,]*}} %this) // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers16normalInlineDeclEv(ptr {{[^,]*}} %this) - // C32-DAG: define weak_odr dso_local dllexport void @_ZN13ExportMembers16normalInlineDeclEv(ptr {{[^,]*}} %this) // G64-DAG: define weak_odr dso_local dllexport void @_ZN13ExportMembers16normalInlineDeclEv(ptr {{[^,]*}} %this) // M32-DAG: define linkonce_odr dso_local x86_thiscallcc void @"?referencedNonExportedInClass@ExportMembers@@QAEXXZ" __declspec(dllexport) void normalDef(); @@ -64,16 +58,12 @@ struct ExportMembers { // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"?virtualInlineDecl@ExportMembers@@UAEXXZ"(ptr {{[^,]*}} %this) // M64-DAG: define weak_odr dso_local dllexport void @"?virtualInlineDecl@ExportMembers@@UEAAXXZ"(ptr {{[^,]*}} %this) // G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers10virtualDefEv(ptr {{[^,]*}} %this) - // C32-DAG: define dso_local dllexport void @_ZN13ExportMembers10virtualDefEv(ptr {{[^,]*}} %this) // G64-DAG: define dso_local dllexport void @_ZN13ExportMembers10virtualDefEv(ptr {{[^,]*}} %this) // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers14virtualInclassEv(ptr {{[^,]*}} %this) - // C32-DAG: define weak_odr dso_local dllexport void @_ZN13ExportMembers14virtualInclassEv(ptr {{[^,]*}} %this) // G64-DAG: define weak_odr dso_local dllexport void @_ZN13ExportMembers14virtualInclassEv(ptr {{[^,]*}} %this) // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers16virtualInlineDefEv(ptr {{[^,]*}} %this) - // C32-DAG: define weak_odr dso_local dllexport void @_ZN13ExportMembers16virtualInlineDefEv(ptr {{[^,]*}} %this) // G64-DAG: define weak_odr dso_local dllexport void @_ZN13ExportMembers16virtualInlineDefEv(ptr {{[^,]*}} %this) // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers17virtualInlineDeclEv(ptr {{[^,]*}} %this) - // C32-DAG: define weak_odr dso_local dllexport void @_ZN13ExportMembers17virtualInlineDeclEv(ptr {{[^,]*}} %this) // G64-DAG: define weak_odr dso_local dllexport void @_ZN13ExportMembers17virtualInlineDeclEv(ptr {{[^,]*}} %this) __declspec(dllexport) virtual void virtualDef(); __declspec(dllexport) virtual void virtualInclass() {} @@ -96,7 +86,6 @@ struct ExportMembers { // M32-DAG: define dso_local dllexport x86_thiscallcc void @"?protectedDef@ExportMembers@@IAEXXZ"(ptr {{[^,]*}} %this) // M64-DAG: define dso_local dllexport void @"?protectedDef@ExportMembers@@IEAAXXZ"(ptr {{[^,]*}} %this) // G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers12protectedDefEv(ptr {{[^,]*}} %this) - // C32-DAG: define dso_local dllexport void @_ZN13ExportMembers12protectedDefEv(ptr {{[^,]*}} %this) // G64-DAG: define dso_local dllexport void @_ZN13ExportMembers12protectedDefEv(ptr {{[^,]*}} %this) // MSC-DAG: define dso_local dllexport void @"?protectedStaticDef@ExportMembers@@KAXXZ"() // GNU-DAG: define dso_local dllexport void @_ZN13ExportMembers18protectedStaticDefEv() @@ -107,7 +96,6 @@ struct ExportMembers { // M32-DAG: define dso_local dllexport x86_thiscallcc void @"?privateDef@ExportMembers@@AAEXXZ"(ptr {{[^,]*}} %this) // M64-DAG: define dso_local dllexport void @"?privateDef@ExportMembers@@AEAAXXZ"(ptr {{[^,]*}} %this) // G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers10privateDefEv(ptr {{[^,]*}} %this) - // C32-DAG: define dso_local dllexport void @_ZN13ExportMembers10privateDefEv(ptr {{[^,]*}} %this) // G64-DAG: define dso_local dllexport void @_ZN13ExportMembers10privateDefEv(ptr {{[^,]*}} %this) // MSC-DAG: define dso_local dllexport void @"?privateStaticDef@ExportMembers@@CAXXZ"() // GNU-DAG: define dso_local dllexport void @_ZN13ExportMembers16privateStaticDefEv() @@ -118,7 +106,6 @@ struct ExportMembers { // M32-DAG: define dso_local x86_thiscallcc void @"?ignored@ExportMembers@@QAEXXZ"(ptr {{[^,]*}} %this) // M64-DAG: define dso_local void @"?ignored@ExportMembers@@QEAAXXZ"(ptr {{[^,]*}} %this) // G32-DAG: define dso_local x86_thiscallcc void @_ZN13ExportMembers7ignoredEv(ptr {{[^,]*}} %this) - // C32-DAG: define dso_local void @_ZN13ExportMembers7ignoredEv(ptr {{[^,]*}} %this) // G64-DAG: define dso_local void @_ZN13ExportMembers7ignoredEv(ptr {{[^,]*}} %this) public: void ignored(); @@ -176,16 +163,12 @@ struct ExportMembers::Nested { // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"?normalInlineDecl@Nested@ExportMembers@@QAEXXZ"(ptr {{[^,]*}} %this) // M64-DAG: define weak_odr dso_local dllexport void @"?normalInlineDecl@Nested@ExportMembers@@QEAAXXZ"(ptr {{[^,]*}} %this) // G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers6Nested9normalDefEv(ptr {{[^,]*}} %this) - // C32-DAG: define dso_local dllexport void @_ZN13ExportMembers6Nested9normalDefEv(ptr {{[^,]*}} %this) // G64-DAG: define dso_local dllexport void @_ZN13ExportMembers6Nested9normalDefEv(ptr {{[^,]*}} %this) // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers6Nested13normalInclassEv(ptr {{[^,]*}} %this) - // C32-DAG: define weak_odr dso_local dllexport void @_ZN13ExportMembers6Nested13normalInclassEv(ptr {{[^,]*}} %this) // G64-DAG: define weak_odr dso_local dllexport void @_ZN13ExportMembers6Nested13normalInclassEv(ptr {{[^,]*}} %this) // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers6Nested15normalInlineDefEv(ptr {{[^,]*}} %this) - // C32-DAG: define weak_odr dso_local dllexport void @_ZN13ExportMembers6Nested15normalInlineDefEv(ptr {{[^,]*}} %this) // G64-DAG: define weak_odr dso_local dllexport void @_ZN13ExportMembers6Nested15normalInlineDefEv(ptr {{[^,]*}} %this) // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers6Nested16normalInlineDeclEv(ptr {{[^,]*}} %this) - // C32-DAG: define weak_odr dso_local dllexport void @_ZN13ExportMembers6Nested16normalInlineDeclEv(ptr {{[^,]*}} %this) // G64-DAG: define weak_odr dso_local dllexport void @_ZN13ExportMembers6Nested16normalInlineDeclEv(ptr {{[^,]*}} %this) __declspec(dllexport) void normalDef(); __declspec(dllexport) void normalInclass() {} @@ -201,16 +184,12 @@ struct ExportMembers::Nested { // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"?virtualInlineDecl@Nested@ExportMembers@@UAEXXZ"(ptr {{[^,]*}} %this) // M64-DAG: define weak_odr dso_local dllexport void @"?virtualInlineDecl@Nested@ExportMembers@@UEAAXXZ"(ptr {{[^,]*}} %this) // G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers6Nested10virtualDefEv(ptr {{[^,]*}} %this) - // C32-DAG: define dso_local dllexport void @_ZN13ExportMembers6Nested10virtualDefEv(ptr {{[^,]*}} %this) // G64-DAG: define dso_local dllexport void @_ZN13ExportMembers6Nested10virtualDefEv(ptr {{[^,]*}} %this) // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers6Nested14virtualInclassEv(ptr {{[^,]*}} %this) - // C32-DAG: define weak_odr dso_local dllexport void @_ZN13ExportMembers6Nested14virtualInclassEv(ptr {{[^,]*}} %this) // G64-DAG: define weak_odr dso_local dllexport void @_ZN13ExportMembers6Nested14virtualInclassEv(ptr {{[^,]*}} %this) // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers6Nested16virtualInlineDefEv(ptr {{[^,]*}} %this) - // C32-DAG: define weak_odr dso_local dllexport void @_ZN13ExportMembers6Nested16virtualInlineDefEv(ptr {{[^,]*}} %this) // G64-DAG: define weak_odr dso_local dllexport void @_ZN13ExportMembers6Nested16virtualInlineDefEv(ptr {{[^,]*}} %this) // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers6Nested17virtualInlineDeclEv(ptr {{[^,]*}} %this) - // C32-DAG: define weak_odr dso_local dllexport void @_ZN13ExportMembers6Nested17virtualInlineDeclEv(ptr {{[^,]*}} %this) // G64-DAG: define weak_odr dso_local dllexport void @_ZN13ExportMembers6Nested17virtualInlineDeclEv(ptr {{[^,]*}} %this) __declspec(dllexport) virtual void virtualDef(); __declspec(dllexport) virtual void virtualInclass() {} @@ -233,7 +212,6 @@ struct ExportMembers::Nested { // M32-DAG: define dso_local dllexport x86_thiscallcc void @"?protectedDef@Nested@ExportMembers@@IAEXXZ"(ptr {{[^,]*}} %this) // M64-DAG: define dso_local dllexport void @"?protectedDef@Nested@ExportMembers@@IEAAXXZ"(ptr {{[^,]*}} %this) // G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers6Nested12protectedDefEv(ptr {{[^,]*}} %this) - // C32-DAG: define dso_local dllexport void @_ZN13ExportMembers6Nested12protectedDefEv(ptr {{[^,]*}} %this) // G64-DAG: define dso_local dllexport void @_ZN13ExportMembers6Nested12protectedDefEv(ptr {{[^,]*}} %this) // MSC-DAG: define dso_local dllexport void @"?protectedStaticDef@Nested@ExportMembers@@KAXXZ"() // GNU-DAG: define dso_local dllexport void @_ZN13ExportMembers6Nested18protectedStaticDefEv() @@ -244,7 +222,6 @@ struct ExportMembers::Nested { // M32-DAG: define dso_local dllexport x86_thiscallcc void @"?privateDef@Nested@ExportMembers@@AAEXXZ"(ptr {{[^,]*}} %this) // M64-DAG: define dso_local dllexport void @"?privateDef@Nested@ExportMembers@@AEAAXXZ"(ptr {{[^,]*}} %this) // G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN13ExportMembers6Nested10privateDefEv(ptr {{[^,]*}} %this) - // C32-DAG: define dso_local dllexport void @_ZN13ExportMembers6Nested10privateDefEv(ptr {{[^,]*}} %this) // G64-DAG: define dso_local dllexport void @_ZN13ExportMembers6Nested10privateDefEv(ptr {{[^,]*}} %this) // MSC-DAG: define dso_local dllexport void @"?privateStaticDef@Nested@ExportMembers@@CAXXZ"() // GNU-DAG: define dso_local dllexport void @_ZN13ExportMembers6Nested16privateStaticDefEv() @@ -255,7 +232,6 @@ struct ExportMembers::Nested { // M32-DAG: define dso_local x86_thiscallcc void @"?ignored@Nested@ExportMembers@@QAEXXZ"(ptr {{[^,]*}} %this) // M64-DAG: define dso_local void @"?ignored@Nested@ExportMembers@@QEAAXXZ"(ptr {{[^,]*}} %this) // G32-DAG: define dso_local x86_thiscallcc void @_ZN13ExportMembers6Nested7ignoredEv(ptr {{[^,]*}} %this) - // C32-DAG: define dso_local void @_ZN13ExportMembers6Nested7ignoredEv(ptr {{[^,]*}} %this) // G64-DAG: define dso_local void @_ZN13ExportMembers6Nested7ignoredEv(ptr {{[^,]*}} %this) public: void ignored(); @@ -307,54 +283,44 @@ struct ExportSpecials { // M32-DAG: define dso_local dllexport x86_thiscallcc ptr @"??0ExportSpecials@@QAE@XZ"(ptr {{[^,]*}} returned {{[^,]*}} %this) // M64-DAG: define dso_local dllexport ptr @"??0ExportSpecials@@QEAA@XZ"(ptr {{[^,]*}} returned {{[^,]*}} %this) // G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN14ExportSpecialsC1Ev(ptr {{[^,]*}} %this) - // C32-DAG: define dso_local dllexport void @_ZN14ExportSpecialsC1Ev(ptr {{[^,]*}} %this) // G64-DAG: define dso_local dllexport void @_ZN14ExportSpecialsC1Ev(ptr {{[^,]*}} %this) // G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN14ExportSpecialsC2Ev(ptr {{[^,]*}} %this) - // C32-DAG: define dso_local dllexport void @_ZN14ExportSpecialsC2Ev(ptr {{[^,]*}} %this) // G64-DAG: define dso_local dllexport void @_ZN14ExportSpecialsC2Ev(ptr {{[^,]*}} %this) __declspec(dllexport) ExportSpecials(); // M32-DAG: define dso_local dllexport x86_thiscallcc void @"??1ExportSpecials@@QAE@XZ"(ptr {{[^,]*}} %this) // M64-DAG: define dso_local dllexport void @"??1ExportSpecials@@QEAA@XZ"(ptr {{[^,]*}} %this) // G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN14ExportSpecialsD1Ev(ptr {{[^,]*}} %this) - // C32-DAG: define dso_local dllexport void @_ZN14ExportSpecialsD1Ev(ptr {{[^,]*}} %this) // G64-DAG: define dso_local dllexport void @_ZN14ExportSpecialsD1Ev(ptr {{[^,]*}} %this) // G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN14ExportSpecialsD2Ev(ptr {{[^,]*}} %this) - // C32-DAG: define dso_local dllexport void @_ZN14ExportSpecialsD2Ev(ptr {{[^,]*}} %this) // G64-DAG: define dso_local dllexport void @_ZN14ExportSpecialsD2Ev(ptr {{[^,]*}} %this) __declspec(dllexport) ~ExportSpecials(); // M32-DAG: define dso_local dllexport x86_thiscallcc ptr @"??0ExportSpecials@@QAE@ABU0@@Z"(ptr {{[^,]*}} returned {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // M64-DAG: define dso_local dllexport ptr @"??0ExportSpecials@@QEAA@AEBU0@@Z"(ptr {{[^,]*}} returned {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN14ExportSpecialsC1ERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) - // C32-DAG: define dso_local dllexport void @_ZN14ExportSpecialsC1ERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G64-DAG: define dso_local dllexport void @_ZN14ExportSpecialsC1ERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN14ExportSpecialsC2ERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) - // C32-DAG: define dso_local dllexport void @_ZN14ExportSpecialsC2ERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G64-DAG: define dso_local dllexport void @_ZN14ExportSpecialsC2ERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) __declspec(dllexport) ExportSpecials(const ExportSpecials&); // M32-DAG: define dso_local dllexport x86_thiscallcc nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @"??4ExportSpecials@@QAEAAU0@ABU0@@Z"(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // M64-DAG: define dso_local dllexport nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @"??4ExportSpecials@@QEAAAEAU0@AEBU0@@Z"(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G32-DAG: define dso_local dllexport x86_thiscallcc nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN14ExportSpecialsaSERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) - // C32-DAG: define dso_local dllexport nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN14ExportSpecialsaSERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G64-DAG: define dso_local dllexport nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN14ExportSpecialsaSERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) __declspec(dllexport) ExportSpecials& operator=(const ExportSpecials&); // M32-DAG: define dso_local dllexport x86_thiscallcc ptr @"??0ExportSpecials@@QAE@$$QAU0@@Z"(ptr {{[^,]*}} returned {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // M64-DAG: define dso_local dllexport ptr @"??0ExportSpecials@@QEAA@$$QEAU0@@Z"(ptr {{[^,]*}} returned {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN14ExportSpecialsC1EOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) - // C32-DAG: define dso_local dllexport void @_ZN14ExportSpecialsC1EOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G64-DAG: define dso_local dllexport void @_ZN14ExportSpecialsC1EOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN14ExportSpecialsC2EOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) - // C32-DAG: define dso_local dllexport void @_ZN14ExportSpecialsC2EOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G64-DAG: define dso_local dllexport void @_ZN14ExportSpecialsC2EOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) __declspec(dllexport) ExportSpecials(ExportSpecials&&); // M32-DAG: define dso_local dllexport x86_thiscallcc nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @"??4ExportSpecials@@QAEAAU0@$$QAU0@@Z"(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // M64-DAG: define dso_local dllexport nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @"??4ExportSpecials@@QEAAAEAU0@$$QEAU0@@Z"(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G32-DAG: define dso_local dllexport x86_thiscallcc nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN14ExportSpecialsaSEOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) - // C32-DAG: define dso_local dllexport nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN14ExportSpecialsaSEOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G64-DAG: define dso_local dllexport nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN14ExportSpecialsaSEOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) __declspec(dllexport) ExportSpecials& operator=(ExportSpecials&&); }; @@ -371,42 +337,36 @@ struct ExportInlineSpecials { // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc ptr @"??0ExportInlineSpecials@@QAE@XZ"(ptr {{[^,]*}} returned {{[^,]*}} %this) // M64-DAG: define weak_odr dso_local dllexport ptr @"??0ExportInlineSpecials@@QEAA@XZ"( // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN20ExportInlineSpecialsC1Ev( - // C32-DAG: define weak_odr dso_local dllexport void @_ZN20ExportInlineSpecialsC1Ev( // G64-DAG: define weak_odr dso_local dllexport void @_ZN20ExportInlineSpecialsC1Ev( __declspec(dllexport) ExportInlineSpecials() {} // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"??1ExportInlineSpecials@@QAE@XZ"( // M64-DAG: define weak_odr dso_local dllexport void @"??1ExportInlineSpecials@@QEAA@XZ"( // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN20ExportInlineSpecialsD1Ev( - // C32-DAG: define weak_odr dso_local dllexport void @_ZN20ExportInlineSpecialsD1Ev( // G64-DAG: define weak_odr dso_local dllexport void @_ZN20ExportInlineSpecialsD1Ev( __declspec(dllexport) ~ExportInlineSpecials() {} // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc ptr @"??0ExportInlineSpecials@@QAE@ABU0@@Z"( // M64-DAG: define weak_odr dso_local dllexport ptr @"??0ExportInlineSpecials@@QEAA@AEBU0@@Z"( // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN20ExportInlineSpecialsC1ERKS_( - // C32-DAG: define weak_odr dso_local dllexport void @_ZN20ExportInlineSpecialsC1ERKS_( // G64-DAG: define weak_odr dso_local dllexport void @_ZN20ExportInlineSpecialsC1ERKS_( __declspec(dllexport) inline ExportInlineSpecials(const ExportInlineSpecials&); // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @"??4ExportInlineSpecials@@QAEAAU0@ABU0@@Z"( // M64-DAG: define weak_odr dso_local dllexport nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @"??4ExportInlineSpecials@@QEAAAEAU0@AEBU0@@Z"( // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN20ExportInlineSpecialsaSERKS_( - // C32-DAG: define weak_odr dso_local dllexport nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN20ExportInlineSpecialsaSERKS_( // G64-DAG: define weak_odr dso_local dllexport nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN20ExportInlineSpecialsaSERKS_( __declspec(dllexport) ExportInlineSpecials& operator=(const ExportInlineSpecials&); // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc ptr @"??0ExportInlineSpecials@@QAE@$$QAU0@@Z"( // M64-DAG: define weak_odr dso_local dllexport ptr @"??0ExportInlineSpecials@@QEAA@$$QEAU0@@Z"( // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN20ExportInlineSpecialsC1EOS_( - // C32-DAG: define weak_odr dso_local dllexport void @_ZN20ExportInlineSpecialsC1EOS_( // G64-DAG: define weak_odr dso_local dllexport void @_ZN20ExportInlineSpecialsC1EOS_( __declspec(dllexport) ExportInlineSpecials(ExportInlineSpecials&&) {} // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @"??4ExportInlineSpecials@@QAEAAU0@$$QAU0@@Z"( // M64-DAG: define weak_odr dso_local dllexport nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @"??4ExportInlineSpecials@@QEAAAEAU0@$$QEAU0@@Z"( // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN20ExportInlineSpecialsaSEOS_( - // C32-DAG: define weak_odr dso_local dllexport nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN20ExportInlineSpecialsaSEOS_( // G64-DAG: define weak_odr dso_local dllexport nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN20ExportInlineSpecialsaSEOS_( __declspec(dllexport) ExportInlineSpecials& operator=(ExportInlineSpecials&&) { return *this; } }; @@ -427,54 +387,44 @@ struct ExportDefaultedDefs { // M32-DAG: define dso_local dllexport x86_thiscallcc ptr @"??0ExportDefaultedDefs@@QAE@XZ"(ptr {{[^,]*}} returned {{[^,]*}} %this) // M64-DAG: define dso_local dllexport ptr @"??0ExportDefaultedDefs@@QEAA@XZ"(ptr {{[^,]*}} returned {{[^,]*}} %this) // G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN19ExportDefaultedDefsC1Ev(ptr {{[^,]*}} %this) -// C32-DAG: define dso_local dllexport void @_ZN19ExportDefaultedDefsC1Ev(ptr {{[^,]*}} %this) // G64-DAG: define dso_local dllexport void @_ZN19ExportDefaultedDefsC1Ev(ptr {{[^,]*}} %this) // G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN19ExportDefaultedDefsC2Ev(ptr {{[^,]*}} %this) -// C32-DAG: define dso_local dllexport void @_ZN19ExportDefaultedDefsC2Ev(ptr {{[^,]*}} %this) // G64-DAG: define dso_local dllexport void @_ZN19ExportDefaultedDefsC2Ev(ptr {{[^,]*}} %this) __declspec(dllexport) ExportDefaultedDefs::ExportDefaultedDefs() = default; // M32-DAG: define dso_local dllexport x86_thiscallcc void @"??1ExportDefaultedDefs@@QAE@XZ"(ptr {{[^,]*}} %this) // M64-DAG: define dso_local dllexport void @"??1ExportDefaultedDefs@@QEAA@XZ"(ptr {{[^,]*}} %this) // G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN19ExportDefaultedDefsD1Ev(ptr {{[^,]*}} %this) -// C32-DAG: define dso_local dllexport void @_ZN19ExportDefaultedDefsD1Ev(ptr {{[^,]*}} %this) // G64-DAG: define dso_local dllexport void @_ZN19ExportDefaultedDefsD1Ev(ptr {{[^,]*}} %this) // G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN19ExportDefaultedDefsD2Ev(ptr {{[^,]*}} %this) -// C32-DAG: define dso_local dllexport void @_ZN19ExportDefaultedDefsD2Ev(ptr {{[^,]*}} %this) // G64-DAG: define dso_local dllexport void @_ZN19ExportDefaultedDefsD2Ev(ptr {{[^,]*}} %this) ExportDefaultedDefs::~ExportDefaultedDefs() = default; // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc ptr @"??0ExportDefaultedDefs@@QAE@ABU0@@Z"(ptr {{[^,]*}} returned {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // M64-DAG: define weak_odr dso_local dllexport ptr @"??0ExportDefaultedDefs@@QEAA@AEBU0@@Z"(ptr {{[^,]*}} returned {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN19ExportDefaultedDefsC1ERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) -// C32-DAG: define weak_odr dso_local dllexport void @_ZN19ExportDefaultedDefsC1ERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G64-DAG: define weak_odr dso_local dllexport void @_ZN19ExportDefaultedDefsC1ERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN19ExportDefaultedDefsC2ERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) -// C32-DAG: define weak_odr dso_local dllexport void @_ZN19ExportDefaultedDefsC2ERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G64-DAG: define weak_odr dso_local dllexport void @_ZN19ExportDefaultedDefsC2ERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) __declspec(dllexport) ExportDefaultedDefs::ExportDefaultedDefs(const ExportDefaultedDefs&) = default; // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @"??4ExportDefaultedDefs@@QAEAAU0@ABU0@@Z"(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // M64-DAG: define weak_odr dso_local dllexport nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @"??4ExportDefaultedDefs@@QEAAAEAU0@AEBU0@@Z"(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN19ExportDefaultedDefsaSERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) -// C32-DAG: define weak_odr dso_local dllexport nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN19ExportDefaultedDefsaSERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G64-DAG: define weak_odr dso_local dllexport nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN19ExportDefaultedDefsaSERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) inline ExportDefaultedDefs& ExportDefaultedDefs::operator=(const ExportDefaultedDefs&) = default; // M32-DAG: define dso_local dllexport x86_thiscallcc ptr @"??0ExportDefaultedDefs@@QAE@$$QAU0@@Z"(ptr {{[^,]*}} returned {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // M64-DAG: define dso_local dllexport ptr @"??0ExportDefaultedDefs@@QEAA@$$QEAU0@@Z"(ptr {{[^,]*}} returned {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN19ExportDefaultedDefsC1EOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) -// C32-DAG: define dso_local dllexport void @_ZN19ExportDefaultedDefsC1EOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G64-DAG: define dso_local dllexport void @_ZN19ExportDefaultedDefsC1EOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN19ExportDefaultedDefsC2EOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) -// C32-DAG: define dso_local dllexport void @_ZN19ExportDefaultedDefsC2EOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G64-DAG: define dso_local dllexport void @_ZN19ExportDefaultedDefsC2EOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) __declspec(dllexport) ExportDefaultedDefs::ExportDefaultedDefs(ExportDefaultedDefs&&) = default; // M32-DAG: define dso_local dllexport x86_thiscallcc nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @"??4ExportDefaultedDefs@@QAEAAU0@$$QAU0@@Z"(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // M64-DAG: define dso_local dllexport nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @"??4ExportDefaultedDefs@@QEAAAEAU0@$$QEAU0@@Z"(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G32-DAG: define dso_local dllexport x86_thiscallcc nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN19ExportDefaultedDefsaSEOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) -// C32-DAG: define dso_local dllexport nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN19ExportDefaultedDefsaSEOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G64-DAG: define dso_local dllexport nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN19ExportDefaultedDefsaSEOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) ExportDefaultedDefs& ExportDefaultedDefs::operator=(ExportDefaultedDefs&&) = default; @@ -516,28 +466,24 @@ struct ExportAlloc { // M32-DAG: define dso_local dllexport ptr @"??2ExportAlloc@@SAPAXI@Z"(i32 %n) // M64-DAG: define dso_local dllexport ptr @"??2ExportAlloc@@SAPEAX_K@Z"(i64 %n) // G32-DAG: define dso_local dllexport ptr @_ZN11ExportAllocnwEj(i32 %n) -// C32-DAG: define dso_local dllexport ptr @_ZN11ExportAllocnwEj(i32 %n) -// G64-DAG: define dso_local dllexport ptr @_ZN11ExportAllocnwE{{[ym]}}(i64 %n) +// G64-DAG: define dso_local dllexport ptr @_ZN11ExportAllocnwEy(i64 %n) void* ExportAlloc::operator new(__SIZE_TYPE__ n) { return malloc(n); } // M32-DAG: define dso_local dllexport ptr @"??_UExportAlloc@@SAPAXI@Z"(i32 %n) // M64-DAG: define dso_local dllexport ptr @"??_UExportAlloc@@SAPEAX_K@Z"(i64 %n) // G32-DAG: define dso_local dllexport ptr @_ZN11ExportAllocnaEj(i32 %n) -// C32-DAG: define dso_local dllexport ptr @_ZN11ExportAllocnaEj(i32 %n) -// G64-DAG: define dso_local dllexport ptr @_ZN11ExportAllocnaE{{[ym]}}(i64 %n) +// G64-DAG: define dso_local dllexport ptr @_ZN11ExportAllocnaEy(i64 %n) void* ExportAlloc::operator new[](__SIZE_TYPE__ n) { return malloc(n); } // M32-DAG: define dso_local dllexport void @"??3ExportAlloc@@SAXPAX@Z"(ptr %p) // M64-DAG: define dso_local dllexport void @"??3ExportAlloc@@SAXPEAX@Z"(ptr %p) // G32-DAG: define dso_local dllexport void @_ZN11ExportAllocdlEPv(ptr %p) -// C32-DAG: define dso_local dllexport void @_ZN11ExportAllocdlEPv(ptr %p) // G64-DAG: define dso_local dllexport void @_ZN11ExportAllocdlEPv(ptr %p) void ExportAlloc::operator delete(void* p) { free(p); } // M32-DAG: define dso_local dllexport void @"??_VExportAlloc@@SAXPAX@Z"(ptr %p) // M64-DAG: define dso_local dllexport void @"??_VExportAlloc@@SAXPEAX@Z"(ptr %p) // G32-DAG: define dso_local dllexport void @_ZN11ExportAllocdaEPv(ptr %p) -// C32-DAG: define dso_local dllexport void @_ZN11ExportAllocdaEPv(ptr %p) // G64-DAG: define dso_local dllexport void @_ZN11ExportAllocdaEPv(ptr %p) void ExportAlloc::operator delete[](void* p) { free(p); } @@ -558,7 +504,6 @@ void useMemFunTmpl() { // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"??$exportedNormal@UImplicitInst_Exported@@@MemFunTmpl@@QAEXXZ"(ptr {{[^,]*}} %this) // M64-DAG: define weak_odr dso_local dllexport void @"??$exportedNormal@UImplicitInst_Exported@@@MemFunTmpl@@QEAAXXZ"(ptr {{[^,]*}} %this) // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN10MemFunTmpl14exportedNormalI21ImplicitInst_ExportedEEvv(ptr {{[^,]*}} %this) - // C32-DAG: define weak_odr dso_local dllexport void @_ZN10MemFunTmpl14exportedNormalI21ImplicitInst_ExportedEEvv(ptr {{[^,]*}} %this) // G64-DAG: define weak_odr dso_local dllexport void @_ZN10MemFunTmpl14exportedNormalI21ImplicitInst_ExportedEEvv(ptr {{[^,]*}} %this) MemFunTmpl().exportedNormal(); @@ -573,7 +518,6 @@ void useMemFunTmpl() { // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"??$exportedNormal@UExplicitDecl_Exported@@@MemFunTmpl@@QAEXXZ"(ptr {{[^,]*}} %this) // M64-DAG: define weak_odr dso_local dllexport void @"??$exportedNormal@UExplicitDecl_Exported@@@MemFunTmpl@@QEAAXXZ"(ptr {{[^,]*}} %this) // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN10MemFunTmpl14exportedNormalI21ExplicitDecl_ExportedEEvv(ptr {{[^,]*}} %this) -// C32-DAG: define weak_odr dso_local dllexport void @_ZN10MemFunTmpl14exportedNormalI21ExplicitDecl_ExportedEEvv(ptr {{[^,]*}} %this) // G64-DAG: define weak_odr dso_local dllexport void @_ZN10MemFunTmpl14exportedNormalI21ExplicitDecl_ExportedEEvv(ptr {{[^,]*}} %this) extern template void MemFunTmpl::exportedNormal(); template void MemFunTmpl::exportedNormal(); @@ -589,7 +533,6 @@ extern template void MemFunTmpl::exportedStatic(); // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"??$exportedNormal@UExplicitInst_Exported@@@MemFunTmpl@@QAEXXZ"(ptr {{[^,]*}} %this) // M64-DAG: define weak_odr dso_local dllexport void @"??$exportedNormal@UExplicitInst_Exported@@@MemFunTmpl@@QEAAXXZ"(ptr {{[^,]*}} %this) // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN10MemFunTmpl14exportedNormalI21ExplicitInst_ExportedEEvv(ptr {{[^,]*}} %this) -// C32-DAG: define weak_odr dso_local dllexport void @_ZN10MemFunTmpl14exportedNormalI21ExplicitInst_ExportedEEvv(ptr {{[^,]*}} %this) // G64-DAG: define weak_odr dso_local dllexport void @_ZN10MemFunTmpl14exportedNormalI21ExplicitInst_ExportedEEvv(ptr {{[^,]*}} %this) template void MemFunTmpl::exportedNormal(); @@ -602,14 +545,12 @@ template void MemFunTmpl::exportedStatic(); // M32-DAG: define dso_local dllexport x86_thiscallcc void @"??$exportedNormal@UExplicitSpec_Def_Exported@@@MemFunTmpl@@QAEXXZ"(ptr {{[^,]*}} %this) // M64-DAG: define dso_local dllexport void @"??$exportedNormal@UExplicitSpec_Def_Exported@@@MemFunTmpl@@QEAAXXZ"(ptr {{[^,]*}} %this) // G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN10MemFunTmpl14exportedNormalI25ExplicitSpec_Def_ExportedEEvv(ptr {{[^,]*}} %this) -// C32-DAG: define dso_local dllexport void @_ZN10MemFunTmpl14exportedNormalI25ExplicitSpec_Def_ExportedEEvv(ptr {{[^,]*}} %this) // G64-DAG: define dso_local dllexport void @_ZN10MemFunTmpl14exportedNormalI25ExplicitSpec_Def_ExportedEEvv(ptr {{[^,]*}} %this) template<> __declspec(dllexport) void MemFunTmpl::exportedNormal() {} // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"??$exportedNormal@UExplicitSpec_InlineDef_Exported@@@MemFunTmpl@@QAEXXZ"(ptr {{[^,]*}} %this) // M64-DAG: define weak_odr dso_local dllexport void @"??$exportedNormal@UExplicitSpec_InlineDef_Exported@@@MemFunTmpl@@QEAAXXZ"(ptr {{[^,]*}} %this) // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN10MemFunTmpl14exportedNormalI31ExplicitSpec_InlineDef_ExportedEEvv(ptr {{[^,]*}} %this) -// C32-DAG: define weak_odr dso_local dllexport void @_ZN10MemFunTmpl14exportedNormalI31ExplicitSpec_InlineDef_ExportedEEvv(ptr {{[^,]*}} %this) // G64-DAG: define weak_odr dso_local dllexport void @_ZN10MemFunTmpl14exportedNormalI31ExplicitSpec_InlineDef_ExportedEEvv(ptr {{[^,]*}} %this) template<> __declspec(dllexport) inline void MemFunTmpl::exportedNormal() {} @@ -627,7 +568,6 @@ template<> __declspec(dllexport) inline void MemFunTmpl::exportedStatic void MemFunTmpl::exportedNormal() {} @@ -641,7 +581,6 @@ template<> void MemFunTmpl::exportedStatic() {} // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"??$normalDef@UExplicitDecl_Exported@@@MemFunTmpl@@QAEXXZ"(ptr {{[^,]*}} %this) // M64-DAG: define weak_odr dso_local dllexport void @"??$normalDef@UExplicitDecl_Exported@@@MemFunTmpl@@QEAAXXZ"(ptr {{[^,]*}} %this) // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN10MemFunTmpl9normalDefI21ExplicitDecl_ExportedEEvv(ptr {{[^,]*}} %this) -// C32-DAG: define weak_odr dso_local dllexport void @_ZN10MemFunTmpl9normalDefI21ExplicitDecl_ExportedEEvv(ptr {{[^,]*}} %this) // G64-DAG: define weak_odr dso_local dllexport void @_ZN10MemFunTmpl9normalDefI21ExplicitDecl_ExportedEEvv(ptr {{[^,]*}} %this) extern template __declspec(dllexport) void MemFunTmpl::normalDef(); template __declspec(dllexport) void MemFunTmpl::normalDef(); @@ -657,7 +596,6 @@ extern template __declspec(dllexport) void MemFunTmpl::staticDef(); @@ -672,10 +610,8 @@ template __declspec(dllexport) void MemFunTmpl::staticDef // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"??$normalDef@UExplicitSpec_InlineDef_Exported@@@MemFunTmpl@@QAEXXZ"(ptr {{[^,]*}} %this) // M64-DAG: define weak_odr dso_local dllexport void @"??$normalDef@UExplicitSpec_InlineDef_Exported@@@MemFunTmpl@@QEAAXXZ"(ptr {{[^,]*}} %this) // G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN10MemFunTmpl9normalDefI25ExplicitSpec_Def_ExportedEEvv(ptr {{[^,]*}} %this) -// C32-DAG: define dso_local dllexport void @_ZN10MemFunTmpl9normalDefI25ExplicitSpec_Def_ExportedEEvv(ptr {{[^,]*}} %this) // G64-DAG: define dso_local dllexport void @_ZN10MemFunTmpl9normalDefI25ExplicitSpec_Def_ExportedEEvv(ptr {{[^,]*}} %this) // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN10MemFunTmpl9normalDefI31ExplicitSpec_InlineDef_ExportedEEvv(ptr {{[^,]*}} %this) -// C32-DAG: define weak_odr dso_local dllexport void @_ZN10MemFunTmpl9normalDefI31ExplicitSpec_InlineDef_ExportedEEvv(ptr {{[^,]*}} %this) // G64-DAG: define weak_odr dso_local dllexport void @_ZN10MemFunTmpl9normalDefI31ExplicitSpec_InlineDef_ExportedEEvv(ptr {{[^,]*}} %this) template<> __declspec(dllexport) void MemFunTmpl::normalDef() {} template<> __declspec(dllexport) inline void MemFunTmpl::normalDef() {} @@ -756,10 +692,8 @@ template struct ClassTmplMem { // MSVC exports explicit specialization of exported class template member function; MinGW does not. // M32-DAG: define dso_local dllexport x86_thiscallcc void @"?exportedNormal@?$ClassTmplMem@H@@QAEXXZ" // G32-DAG: define dso_local x86_thiscallcc void @_ZN12ClassTmplMemIiE14exportedNormalEv -// C32-DAG: define dso_local void @_ZN12ClassTmplMemIiE14exportedNormalEv template<> void ClassTmplMem::exportedNormal() {} // M32-DAG: define dso_local dllexport void @"?exportedStatic@?$ClassTmplMem@H@@SAXXZ" // G32-DAG: define dso_local void @_ZN12ClassTmplMemIiE14exportedStaticEv -// C32-DAG: define dso_local void @_ZN12ClassTmplMemIiE14exportedStaticEv template<> void ClassTmplMem::exportedStatic() {} diff --git a/clang/test/CodeGenCXX/dllexport-missing-key.cpp b/clang/test/CodeGenCXX/dllexport-missing-key.cpp index 505679f315331..90e736f6fad3a 100644 --- a/clang/test/CodeGenCXX/dllexport-missing-key.cpp +++ b/clang/test/CodeGenCXX/dllexport-missing-key.cpp @@ -1,5 +1,4 @@ // RUN: %clang_cc1 -triple x86_64-windows-gnu -emit-llvm -std=c++11 -o - %s | FileCheck --check-prefix=GNU %s -// RUN: %clang_cc1 -triple x86_64-pc-cygwin -emit-llvm -std=c++11 -o - %s | FileCheck --check-prefix=GNU %s class __declspec(dllexport) QAbstractLayoutStyleInfo { public: diff --git a/clang/test/CodeGenCXX/dllexport.cpp b/clang/test/CodeGenCXX/dllexport.cpp index dfbb2762ac85c..c8ac526f4cbe3 100644 --- a/clang/test/CodeGenCXX/dllexport.cpp +++ b/clang/test/CodeGenCXX/dllexport.cpp @@ -6,10 +6,8 @@ // RUN: %clang_cc1 -no-enable-noundef-analysis -triple i686-windows-gnu -emit-llvm -std=c++1y -fno-threadsafe-statics -fms-extensions -O0 -o - %s -w | FileCheck -allow-deprecated-dag-overlap --check-prefix=GNU --check-prefix=G32 %s // RUN: %clang_cc1 -no-enable-noundef-analysis -triple x86_64-windows-gnu -emit-llvm -std=c++1y -fno-threadsafe-statics -fms-extensions -O0 -o - %s -w | FileCheck -allow-deprecated-dag-overlap --check-prefix=GNU %s -// RUN: %clang_cc1 -no-enable-noundef-analysis -triple i686-pc-cygwin -emit-llvm -std=c++1y -fno-threadsafe-statics -fms-extensions -O0 -o - %s -w | FileCheck -allow-deprecated-dag-overlap --check-prefix=GNU --check-prefix=C32 %s -// RUN: %clang_cc1 -no-enable-noundef-analysis -triple x86_64-pc-cygwin -emit-llvm -std=c++1y -fno-threadsafe-statics -fms-extensions -O0 -o - %s -w | FileCheck -allow-deprecated-dag-overlap --check-prefix=GNU %s -// RUN: %clang_cc1 -no-enable-noundef-analysis -triple x86_64-scei-ps4 -emit-llvm -std=c++1y -fno-threadsafe-statics -fms-extensions -O0 -o - %s -w | FileCheck -allow-deprecated-dag-overlap --check-prefix=PS %s -// RUN: %clang_cc1 -no-enable-noundef-analysis -triple x86_64-sie-ps5 -emit-llvm -std=c++1y -fno-threadsafe-statics -fms-extensions -O0 -o - %s -w | FileCheck -allow-deprecated-dag-overlap --check-prefix=PS %s +// RUN: %clang_cc1 -no-enable-noundef-analysis -triple x86_64-scei-ps4 -emit-llvm -std=c++1y -fno-threadsafe-statics -fms-extensions -O0 -o - %s -w | FileCheck -allow-deprecated-dag-overlap --check-prefix=PS %s +// RUN: %clang_cc1 -no-enable-noundef-analysis -triple x86_64-sie-ps5 -emit-llvm -std=c++1y -fno-threadsafe-statics -fms-extensions -O0 -o - %s -w | FileCheck -allow-deprecated-dag-overlap --check-prefix=PS %s // Helper structs to make templates more expressive. struct ImplicitInst_Exported {}; @@ -310,7 +308,7 @@ void Befriended::func() {} // Implicit declarations can be redeclared with dllexport. // MSC-DAG: define dso_local dllexport nonnull ptr @"??2@{{YAPAXI|YAPEAX_K}}@Z"( -// GNU-DAG: define dso_local dllexport nonnull ptr @_Znw{{[yjm]}}( +// GNU-DAG: define dso_local dllexport nonnull ptr @_Znw{{[yj]}}( void* alloc(__SIZE_TYPE__ n); __declspec(dllexport) void* operator new(__SIZE_TYPE__ n) { return alloc(n); } @@ -618,7 +616,6 @@ void W::foo() {} // M32-DAG: [[W_VTABLE:@.*]] = private unnamed_addr constant { [2 x ptr] } { [2 x ptr] [ptr @"??_R4W@@6B@", ptr @"?foo@W@@UAEXXZ"] }, comdat($"??_7W@@6B@") // M32-DAG: @"??_7W@@6B@" = dllexport unnamed_addr alias ptr, getelementptr inbounds ({ [2 x ptr] }, ptr [[W_VTABLE]], i32 0, i32 0, i32 1) // G32-DAG: @_ZTV1W = dso_local dllexport unnamed_addr constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr @_ZTI1W, ptr @_ZN1W3fooEv] } -// C32-DAG: @_ZTV1W = dso_local dllexport unnamed_addr constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr @_ZTI1W, ptr @_ZN1W3fooEv] } struct __declspec(dllexport) X : public virtual W {}; // vbtable: @@ -702,7 +699,6 @@ template void PartiallySpecializedClassTemplate::f() {} USEMEMFUNC(PartiallySpecializedClassTemplate, f); // M32-DAG: define linkonce_odr dso_local x86_thiscallcc void @"?f@?$PartiallySpecializedClassTemplate@PAX@@QAEXXZ" // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN33PartiallySpecializedClassTemplateIPvE1fEv -// C32-DAG: define weak_odr dso_local dllexport void @_ZN33PartiallySpecializedClassTemplateIPvE1fEv // Attributes on explicit specializations are honored. template struct ExplicitlySpecializedClassTemplate {}; @@ -711,7 +707,6 @@ void ExplicitlySpecializedClassTemplate::f() {} USEMEMFUNC(ExplicitlySpecializedClassTemplate, f); // M32-DAG: define dso_local dllexport x86_thiscallcc void @"?f@?$ExplicitlySpecializedClassTemplate@PAX@@QAEXXZ" // G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN34ExplicitlySpecializedClassTemplateIPvE1fEv -// C32-DAG: define dso_local dllexport void @_ZN34ExplicitlySpecializedClassTemplateIPvE1fEv // MS inherits DLL attributes to partial specializations. template struct __declspec(dllexport) PartiallySpecializedExportedClassTemplate {}; @@ -719,7 +714,6 @@ template struct PartiallySpecializedExportedClassTemplate { voi USEMEMFUNC(PartiallySpecializedExportedClassTemplate, f); // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"?f@?$PartiallySpecializedExportedClassTemplate@PAX@@QAEXXZ" // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN41PartiallySpecializedExportedClassTemplateIPvE1fEv -// C32-DAG: define linkonce_odr dso_local void @_ZN41PartiallySpecializedExportedClassTemplateIPvE1fEv // MS ignores DLL attributes on partial specializations; inheritance still works though. template struct __declspec(dllexport) PartiallySpecializedExportedClassTemplate2 {}; @@ -728,7 +722,6 @@ template void PartiallySpecializedExportedClassTemplate2::f() { USEMEMFUNC(PartiallySpecializedExportedClassTemplate2, f); // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"?f@?$PartiallySpecializedExportedClassTemplate2@PAX@@QAEXXZ" // G32-DAG: declare dllimport x86_thiscallcc void @_ZN42PartiallySpecializedExportedClassTemplate2IPvE1fEv -// C32-DAG: declare dllimport void @_ZN42PartiallySpecializedExportedClassTemplate2IPvE1fEv // Attributes on the instantiation take precedence over attributes on the template. template struct __declspec(dllimport) ExplicitlyInstantiatedWithDifferentAttr { void f() {} }; @@ -778,7 +771,6 @@ USEMEMFUNC(ExplicitInstantiationDeclExportedDefTemplate, f); // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"?f@?$ExplicitInstantiationDeclExportedDefTemplate@H@@QAEXXZ" // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc ptr @"??0?$ExplicitInstantiationDeclExportedDefTemplate@H@@QAE@XZ" // G32-DAG: define weak_odr dso_local x86_thiscallcc void @_ZN44ExplicitInstantiationDeclExportedDefTemplateIiE1fEv -// C32-DAG: define weak_odr dso_local void @_ZN44ExplicitInstantiationDeclExportedDefTemplateIiE1fEv template struct ImplicitInstantiationExportedExplicitInstantiationDefTemplate { virtual void f() {} }; ImplicitInstantiationExportedExplicitInstantiationDefTemplate ImplicitInstantiationExportedExplicitInstantiationDefTemplateInstance; @@ -786,7 +778,6 @@ template struct __declspec(dllexport) ImplicitInstantiationExportedExplicitInsta USEMEMFUNC(ImplicitInstantiationExportedExplicitInstantiationDefTemplate, f); // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"?f@?$ImplicitInstantiationExportedExplicitInstantiationDefTemplate@H@@UAEXXZ" // G32-DAG: define weak_odr dso_local x86_thiscallcc void @_ZN61ImplicitInstantiationExportedExplicitInstantiationDefTemplateIiE1fEv -// C32-DAG: define weak_odr dso_local void @_ZN61ImplicitInstantiationExportedExplicitInstantiationDefTemplateIiE1fEv template struct __declspec(dllexport) ImplicitInstantiationExplicitInstantiationDefExportedTemplate { virtual void f() {} }; ImplicitInstantiationExplicitInstantiationDefExportedTemplate ImplicitInstantiationExplicitInstantiationDefExportedTemplateInstance; @@ -794,7 +785,6 @@ template struct ImplicitInstantiationExplicitInstantiationDefExportedTemplate, f); // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"?f@?$ImplicitInstantiationExplicitInstantiationDefExportedTemplate@H@@UAEXXZ" // G32-DAG: define weak_odr dso_local x86_thiscallcc void @_ZN61ImplicitInstantiationExplicitInstantiationDefExportedTemplateIiE1fEv -// C32-DAG: define weak_odr dso_local void @_ZN61ImplicitInstantiationExplicitInstantiationDefExportedTemplateIiE1fEv template struct __declspec(dllexport) ImplicitInstantiationExportedExplicitInstantiationDefExportedTemplate { virtual void f() {} }; ImplicitInstantiationExportedExplicitInstantiationDefExportedTemplate ImplicitInstantiationExportedExplicitInstantiationDefExportedTemplateInstance; @@ -802,7 +792,6 @@ template struct __declspec(dllexport) ImplicitInstantiationExportedExplicitInsta USEMEMFUNC(ImplicitInstantiationExportedExplicitInstantiationDefExportedTemplate, f); // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"?f@?$ImplicitInstantiationExportedExplicitInstantiationDefExportedTemplate@H@@UAEXXZ" // G32-DAG: define weak_odr dso_local x86_thiscallcc void @_ZN69ImplicitInstantiationExportedExplicitInstantiationDefExportedTemplateIiE1fEv -// C32-DAG: define weak_odr dso_local void @_ZN69ImplicitInstantiationExportedExplicitInstantiationDefExportedTemplateIiE1fEv namespace { struct InternalLinkageType {}; } struct __declspec(dllexport) PR23308 { @@ -993,7 +982,6 @@ struct __declspec(dllexport) DerivedFromTemplate : public ClassTemplate {}; USEMEMFUNC(DerivedFromTemplate, func) // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"?func@?$ClassTemplate@H@@QAEXXZ" // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ClassTemplateIiE4funcEv -// C32-DAG: define linkonce_odr dso_local void @_ZN13ClassTemplateIiE4funcEv // PS-DAG: define weak_odr dllexport void @_ZN13ClassTemplateIiE4funcEv // ExportedTemplate is explicitly exported. @@ -1001,7 +989,6 @@ struct __declspec(dllexport) DerivedFromExportedTemplate : public ExportedClassT USEMEMFUNC(DerivedFromExportedTemplate, func) // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"?func@?$ExportedClassTemplate@H@@QAEXXZ" // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN21ExportedClassTemplateIiE4funcEv -// C32-DAG: define weak_odr dso_local dllexport void @_ZN21ExportedClassTemplateIiE4funcEv // PS-DAG: define weak_odr dllexport void @_ZN21ExportedClassTemplateIiE4funcEv // ImportedClassTemplate is explicitly imported. @@ -1009,7 +996,6 @@ struct __declspec(dllexport) DerivedFromImportedTemplate : public ImportedClassT USEMEMFUNC(DerivedFromImportedTemplate, func) // M32-DAG: {{declare|define available_externally}} dllimport x86_thiscallcc void @"?func@?$ImportedClassTemplate@H@@QAEXXZ" // G32-DAG: declare dllimport x86_thiscallcc void @_ZN21ImportedClassTemplateIiE4funcEv -// C32-DAG: declare dllimport void @_ZN21ImportedClassTemplateIiE4funcEv // PS-DAG: declare dllimport void @_ZN21ImportedClassTemplateIiE4funcEv // Base class already implicitly instantiated without dll attribute. @@ -1018,7 +1004,6 @@ struct __declspec(dllexport) DerivedFromTemplateD2 : public ClassTemplate USEMEMFUNC(DerivedFromTemplateB2, func) // M32-DAG: {{declare|define available_externally}} dllimport x86_thiscallcc void @"?func@?$ClassTemplate@_N@@QAEXXZ" // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ClassTemplateIbE4funcEv -// C32-DAG: define linkonce_odr dso_local void @_ZN13ClassTemplateIbE4funcEv // PS-DAG: declare dllimport void @_ZN13ClassTemplateIbE4funcEv // Base class already specialized without dll attribute. @@ -1035,7 +1019,6 @@ struct __declspec(dllexport) DerivedFromExplicitlySpecializedTemplate : public E USEMEMFUNC(DerivedFromExplicitlySpecializedTemplate, func) // M32-DAG: define dso_local x86_thiscallcc void @"?func@?$ExplicitlySpecializedTemplate@H@@QAEXXZ" // G32-DAG: define dso_local x86_thiscallcc void @_ZN29ExplicitlySpecializedTemplateIiE4funcEv -// C32-DAG: define dso_local void @_ZN29ExplicitlySpecializedTemplateIiE4funcEv // PS-DAG: define dso_local void @_ZN29ExplicitlySpecializedTemplateIiE4funcEv // Base class alredy specialized with export attribute. @@ -1043,7 +1026,6 @@ struct __declspec(dllexport) DerivedFromExplicitlyExportSpecializedTemplate : pu USEMEMFUNC(DerivedFromExplicitlyExportSpecializedTemplate, func) // M32-DAG: define dso_local dllexport x86_thiscallcc void @"?func@?$ExplicitlyExportSpecializedTemplate@H@@QAEXXZ" // G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN35ExplicitlyExportSpecializedTemplateIiE4funcEv -// C32-DAG: define dso_local dllexport void @_ZN35ExplicitlyExportSpecializedTemplateIiE4funcEv // PS-DAG: define dso_local dllexport void @_ZN35ExplicitlyExportSpecializedTemplateIiE4funcEv // Base class already specialized with import attribute. @@ -1051,7 +1033,6 @@ struct __declspec(dllexport) DerivedFromExplicitlyImportSpecializedTemplate : pu USEMEMFUNC(DerivedFromExplicitlyImportSpecializedTemplate, func) // M32-DAG: declare dllimport x86_thiscallcc void @"?func@?$ExplicitlyImportSpecializedTemplate@H@@QAEXXZ" // G32-DAG: declare dllimport x86_thiscallcc void @_ZN35ExplicitlyImportSpecializedTemplateIiE4funcEv -// C32-DAG: declare dllimport void @_ZN35ExplicitlyImportSpecializedTemplateIiE4funcEv // PS-DAG: declare dllimport void @_ZN35ExplicitlyImportSpecializedTemplateIiE4funcEv // Base class already instantiated without dll attribute. @@ -1059,7 +1040,6 @@ struct __declspec(dllexport) DerivedFromExplicitlyInstantiatedTemplate : public USEMEMFUNC(DerivedFromExplicitlyInstantiatedTemplate, func) // M32-DAG: define weak_odr dso_local x86_thiscallcc void @"?func@?$ExplicitlyInstantiatedTemplate@H@@QAEXXZ" // G32-DAG: define weak_odr dso_local x86_thiscallcc void @_ZN30ExplicitlyInstantiatedTemplateIiE4funcEv -// C32-DAG: define weak_odr dso_local void @_ZN30ExplicitlyInstantiatedTemplateIiE4funcEv // PS-DAG: define weak_odr void @_ZN30ExplicitlyInstantiatedTemplateIiE4funcEv // Base class already instantiated with export attribute. @@ -1067,7 +1047,6 @@ struct __declspec(dllexport) DerivedFromExplicitlyExportInstantiatedTemplate : p USEMEMFUNC(DerivedFromExplicitlyExportInstantiatedTemplate, func) // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"?func@?$ExplicitlyExportInstantiatedTemplate@H@@QAEXXZ" // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN36ExplicitlyExportInstantiatedTemplateIiE4funcEv -// C32-DAG: define weak_odr dso_local dllexport void @_ZN36ExplicitlyExportInstantiatedTemplateIiE4funcEv // PS-DAG: define weak_odr dllexport void @_ZN36ExplicitlyExportInstantiatedTemplateIiE4funcEv // Base class already instantiated with import attribute. @@ -1075,7 +1054,6 @@ struct __declspec(dllexport) DerivedFromExplicitlyImportInstantiatedTemplate : p USEMEMFUNC(DerivedFromExplicitlyImportInstantiatedTemplate, func) // M32-DAG: declare dllimport x86_thiscallcc void @"?func@?$ExplicitlyImportInstantiatedTemplate@H@@QAEXXZ" // G32-DAG: declare dllimport x86_thiscallcc void @_ZN36ExplicitlyImportInstantiatedTemplateIiE4funcEv -// C32-DAG: declare dllimport void @_ZN36ExplicitlyImportInstantiatedTemplateIiE4funcEv // PS-DAG: declare dllimport void @_ZN36ExplicitlyImportInstantiatedTemplateIiE4funcEv // MS: A dll attribute propagates through multiple levels of instantiation. @@ -1085,7 +1063,6 @@ struct __declspec(dllexport) BottomClass : public MiddleClass { }; USEMEMFUNC(BottomClass, func) // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"?func@?$TopClass@H@@QAEXXZ" // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN8TopClassIiE4funcEv -// C32-DAG: define linkonce_odr dso_local void @_ZN8TopClassIiE4funcEv // PS-DAG: define weak_odr dllexport void @_ZN8TopClassIiE4funcEv template struct ExplicitInstantiationDeclTemplateBase { void func() {} }; @@ -1094,7 +1071,6 @@ struct __declspec(dllexport) DerivedFromExplicitInstantiationDeclTemplateBase : template struct ExplicitInstantiationDeclTemplateBase; // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"?func@?$ExplicitInstantiationDeclTemplateBase@H@@QAEXXZ" // G32-DAG: define weak_odr dso_local x86_thiscallcc void @_ZN37ExplicitInstantiationDeclTemplateBaseIiE4funcEv -// C32-DAG: define weak_odr dso_local void @_ZN37ExplicitInstantiationDeclTemplateBaseIiE4funcEv // PS-DAG: define weak_odr dllexport void @_ZN37ExplicitInstantiationDeclTemplateBaseIiE4funcEv // PR26076 diff --git a/clang/test/CodeGenCXX/dllimport-members.cpp b/clang/test/CodeGenCXX/dllimport-members.cpp index 896f1547b658f..19bd7fec3f337 100644 --- a/clang/test/CodeGenCXX/dllimport-members.cpp +++ b/clang/test/CodeGenCXX/dllimport-members.cpp @@ -2,11 +2,8 @@ // RUN: %clang_cc1 -no-enable-noundef-analysis -disable-llvm-passes -triple x86_64-windows-msvc -fms-compatibility -emit-llvm -std=c++1y -O0 -o - %s -DMSABI | FileCheck --check-prefix=MSC --check-prefix=M64 %s // RUN: %clang_cc1 -no-enable-noundef-analysis -disable-llvm-passes -triple i686-windows-gnu -emit-llvm -std=c++1y -O0 -o - %s | FileCheck --check-prefix=GNU --check-prefix=G32 %s // RUN: %clang_cc1 -no-enable-noundef-analysis -disable-llvm-passes -triple x86_64-windows-gnu -emit-llvm -std=c++1y -O0 -o - %s | FileCheck --check-prefix=GNU --check-prefix=G64 %s -// RUN: %clang_cc1 -no-enable-noundef-analysis -disable-llvm-passes -triple i686-pc-cygwin -emit-llvm -std=c++1y -O0 -o - %s | FileCheck --check-prefix=GNU --check-prefix=C32 %s -// RUN: %clang_cc1 -no-enable-noundef-analysis -disable-llvm-passes -triple x86_64-pc-cygwin -emit-llvm -std=c++1y -O0 -o - %s | FileCheck --check-prefix=GNU --check-prefix=G64 %s // RUN: %clang_cc1 -no-enable-noundef-analysis -disable-llvm-passes -triple i686-windows-msvc -fms-compatibility -emit-llvm -std=c++1y -O1 -o - %s -DMSABI | FileCheck --check-prefix=MO1 %s // RUN: %clang_cc1 -no-enable-noundef-analysis -disable-llvm-passes -triple i686-windows-gnu -emit-llvm -std=c++1y -O1 -o - %s | FileCheck --check-prefix=GO1 %s -// RUN: %clang_cc1 -no-enable-noundef-analysis -disable-llvm-passes -triple i686-pc-cygwin -emit-llvm -std=c++1y -O1 -o - %s | FileCheck --check-prefix=CO1 %s // Helper structs to make templates more expressive. struct ImplicitInst_Imported {}; @@ -77,29 +74,21 @@ struct ImportMembers { // M32-DAG: declare dllimport x86_thiscallcc void @"?normalInlineDecl@ImportMembers@@QAEXXZ"(ptr {{[^,]*}}) // M64-DAG: declare dllimport void @"?normalInlineDecl@ImportMembers@@QEAAXXZ"(ptr {{[^,]*}}) // G32-DAG: define dso_local x86_thiscallcc void @_ZN13ImportMembers9normalDefEv(ptr {{[^,]*}} %this) - // C32-DAG: define dso_local void @_ZN13ImportMembers9normalDefEv(ptr {{[^,]*}} %this) // G64-DAG: define dso_local void @_ZN13ImportMembers9normalDefEv(ptr {{[^,]*}} %this) // G32-DAG: declare dllimport x86_thiscallcc void @_ZN13ImportMembers10normalDeclEv(ptr {{[^,]*}}) - // C32-DAG: declare dllimport void @_ZN13ImportMembers10normalDeclEv(ptr {{[^,]*}}) // G64-DAG: declare dllimport void @_ZN13ImportMembers10normalDeclEv(ptr {{[^,]*}}) // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers13normalInclassEv(ptr {{[^,]*}} %this) - // C32-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers13normalInclassEv(ptr {{[^,]*}} %this) // G64-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers13normalInclassEv(ptr {{[^,]*}} %this) // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers15normalInlineDefEv(ptr {{[^,]*}} %this) - // C32-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers15normalInlineDefEv(ptr {{[^,]*}} %this) // G64-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers15normalInlineDefEv(ptr {{[^,]*}} %this) // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers16normalInlineDeclEv(ptr {{[^,]*}} %this) - // C32-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers16normalInlineDeclEv(ptr {{[^,]*}} %this) - // G64-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers16normalInlineDeclEv(ptr {{[^,]*}} %this) + // G64-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers16normalInlineDeclEv(ptr {{[^,]*}} %this) // MO1-DAG: define available_externally dllimport x86_thiscallcc void @"?normalInclass@ImportMembers@@QAEXXZ"( // MO1-DAG: define available_externally dllimport x86_thiscallcc void @"?normalInlineDef@ImportMembers@@QAEXXZ"( // MO1-DAG: define available_externally dllimport x86_thiscallcc void @"?normalInlineDecl@ImportMembers@@QAEXXZ"( // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers13normalInclassEv( // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers15normalInlineDefEv( // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers16normalInlineDeclEv( - // CO1-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers13normalInclassEv( - // CO1-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers15normalInlineDefEv( - // CO1-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers16normalInlineDeclEv( __declspec(dllimport) void normalDef(); // dllimport ignored __declspec(dllimport) void normalDecl(); __declspec(dllimport) void normalInclass() {} @@ -117,19 +106,14 @@ struct ImportMembers { // M32-DAG: declare dllimport x86_thiscallcc void @"?virtualInlineDecl@ImportMembers@@UAEXXZ"(ptr {{[^,]*}}) // M64-DAG: declare dllimport void @"?virtualInlineDecl@ImportMembers@@UEAAXXZ"(ptr {{[^,]*}}) // G32-DAG: define dso_local x86_thiscallcc void @_ZN13ImportMembers10virtualDefEv(ptr {{[^,]*}} %this) - // C32-DAG: define dso_local void @_ZN13ImportMembers10virtualDefEv(ptr {{[^,]*}} %this) // G64-DAG: define dso_local void @_ZN13ImportMembers10virtualDefEv(ptr {{[^,]*}} %this) // G32-DAG: declare dllimport x86_thiscallcc void @_ZN13ImportMembers11virtualDeclEv(ptr {{[^,]*}}) - // C32-DAG: declare dllimport void @_ZN13ImportMembers11virtualDeclEv(ptr {{[^,]*}}) // G64-DAG: declare dllimport void @_ZN13ImportMembers11virtualDeclEv(ptr {{[^,]*}}) // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers14virtualInclassEv(ptr {{[^,]*}} %this) - // C32-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers14virtualInclassEv(ptr {{[^,]*}} %this) // G64-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers14virtualInclassEv(ptr {{[^,]*}} %this) // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers16virtualInlineDefEv(ptr {{[^,]*}} %this) - // C32-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers16virtualInlineDefEv(ptr {{[^,]*}} %this) // G64-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers16virtualInlineDefEv(ptr {{[^,]*}} %this) // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers17virtualInlineDeclEv(ptr {{[^,]*}} %this) - // C32-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers17virtualInlineDeclEv(ptr {{[^,]*}} %this) // G64-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers17virtualInlineDeclEv(ptr {{[^,]*}} %this) // MO1-DAG: define available_externally dllimport x86_thiscallcc void @"?virtualInclass@ImportMembers@@UAEXXZ"( // MO1-DAG: define available_externally dllimport x86_thiscallcc void @"?virtualInlineDef@ImportMembers@@UAEXXZ"( @@ -137,9 +121,6 @@ struct ImportMembers { // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers14virtualInclassEv( // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers16virtualInlineDefEv( // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers17virtualInlineDeclEv( - // CO1-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers14virtualInclassEv( - // CO1-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers16virtualInlineDefEv( - // CO1-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers17virtualInlineDeclEv( __declspec(dllimport) virtual void virtualDef(); // dllimport ignored __declspec(dllimport) virtual void virtualDecl(); __declspec(dllimport) virtual void virtualInclass() {} @@ -162,9 +143,6 @@ struct ImportMembers { // GO1-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers13staticInclassEv() // GO1-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers15staticInlineDefEv() // GO1-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers16staticInlineDeclEv() - // CO1-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers13staticInclassEv() - // CO1-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers15staticInlineDefEv() - // CO1-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers16staticInlineDeclEv() __declspec(dllimport) static void staticDef(); // dllimport ignored __declspec(dllimport) static void staticDecl(); __declspec(dllimport) static void staticInclass() {} @@ -174,7 +152,6 @@ struct ImportMembers { // M32-DAG: declare dllimport x86_thiscallcc void @"?protectedNormalDecl@ImportMembers@@IAEXXZ"(ptr {{[^,]*}}) // M64-DAG: declare dllimport void @"?protectedNormalDecl@ImportMembers@@IEAAXXZ"(ptr {{[^,]*}}) // G32-DAG: declare dllimport x86_thiscallcc void @_ZN13ImportMembers19protectedNormalDeclEv(ptr {{[^,]*}}) - // C32-DAG: declare dllimport void @_ZN13ImportMembers19protectedNormalDeclEv(ptr {{[^,]*}}) // G64-DAG: declare dllimport void @_ZN13ImportMembers19protectedNormalDeclEv(ptr {{[^,]*}}) // MSC-DAG: declare dllimport void @"?protectedStaticDecl@ImportMembers@@KAXXZ"() // GNU-DAG: declare dllimport void @_ZN13ImportMembers19protectedStaticDeclEv() @@ -185,7 +162,6 @@ struct ImportMembers { // M32-DAG: declare dllimport x86_thiscallcc void @"?privateNormalDecl@ImportMembers@@AAEXXZ"(ptr {{[^,]*}}) // M64-DAG: declare dllimport void @"?privateNormalDecl@ImportMembers@@AEAAXXZ"(ptr {{[^,]*}}) // G32-DAG: declare dllimport x86_thiscallcc void @_ZN13ImportMembers17privateNormalDeclEv(ptr {{[^,]*}}) - // C32-DAG: declare dllimport void @_ZN13ImportMembers17privateNormalDeclEv(ptr {{[^,]*}}) // G64-DAG: declare dllimport void @_ZN13ImportMembers17privateNormalDeclEv(ptr {{[^,]*}}) // MSC-DAG: declare dllimport void @"?privateStaticDecl@ImportMembers@@CAXXZ"() // GNU-DAG: declare dllimport void @_ZN13ImportMembers17privateStaticDeclEv() @@ -196,7 +172,6 @@ struct ImportMembers { // M32-DAG: declare dso_local x86_thiscallcc void @"?ignored@ImportMembers@@QAEXXZ"(ptr {{[^,]*}}) // M64-DAG: declare dso_local void @"?ignored@ImportMembers@@QEAAXXZ"(ptr {{[^,]*}}) // G32-DAG: declare dso_local x86_thiscallcc void @_ZN13ImportMembers7ignoredEv(ptr {{[^,]*}}) - // C32-DAG: declare dso_local void @_ZN13ImportMembers7ignoredEv(ptr {{[^,]*}}) // G64-DAG: declare dso_local void @_ZN13ImportMembers7ignoredEv(ptr {{[^,]*}}) public: void ignored(); @@ -271,19 +246,14 @@ struct ImportMembers::Nested { // M32-DAG: declare dllimport x86_thiscallcc void @"?normalInlineDecl@Nested@ImportMembers@@QAEXXZ"(ptr {{[^,]*}}) // M64-DAG: declare dllimport void @"?normalInlineDecl@Nested@ImportMembers@@QEAAXXZ"(ptr {{[^,]*}}) // G32-DAG: define dso_local x86_thiscallcc void @_ZN13ImportMembers6Nested9normalDefEv(ptr {{[^,]*}} %this) - // C32-DAG: define dso_local void @_ZN13ImportMembers6Nested9normalDefEv(ptr {{[^,]*}} %this) // G64-DAG: define dso_local void @_ZN13ImportMembers6Nested9normalDefEv(ptr {{[^,]*}} %this) // G32-DAG: declare dllimport x86_thiscallcc void @_ZN13ImportMembers6Nested10normalDeclEv(ptr {{[^,]*}}) - // C32-DAG: declare dllimport void @_ZN13ImportMembers6Nested10normalDeclEv(ptr {{[^,]*}}) // G64-DAG: declare dllimport void @_ZN13ImportMembers6Nested10normalDeclEv(ptr {{[^,]*}}) // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers6Nested13normalInclassEv(ptr {{[^,]*}} %this) - // C32-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers6Nested13normalInclassEv(ptr {{[^,]*}} %this) // G64-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers6Nested13normalInclassEv(ptr {{[^,]*}} %this) // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers6Nested15normalInlineDefEv(ptr {{[^,]*}} %this) - // C32-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers6Nested15normalInlineDefEv(ptr {{[^,]*}} %this) // G64-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers6Nested15normalInlineDefEv(ptr {{[^,]*}} %this) // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers6Nested16normalInlineDeclEv(ptr {{[^,]*}} %this) - // C32-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers6Nested16normalInlineDeclEv(ptr {{[^,]*}} %this) // G64-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers6Nested16normalInlineDeclEv(ptr {{[^,]*}} %this) // MO1-DAG: define available_externally dllimport x86_thiscallcc void @"?normalInclass@Nested@ImportMembers@@QAEXXZ"( // MO1-DAG: define available_externally dllimport x86_thiscallcc void @"?normalInlineDef@Nested@ImportMembers@@QAEXXZ"( @@ -291,9 +261,6 @@ struct ImportMembers::Nested { // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers6Nested13normalInclassEv( // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers6Nested15normalInlineDefEv( // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers6Nested16normalInlineDeclEv( - // CO1-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers6Nested13normalInclassEv( - // CO1-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers6Nested15normalInlineDefEv( - // CO1-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers6Nested16normalInlineDeclEv( __declspec(dllimport) void normalDef(); // dllimport ignored __declspec(dllimport) void normalDecl(); __declspec(dllimport) void normalInclass() {} @@ -311,19 +278,14 @@ struct ImportMembers::Nested { // M32-DAG: declare dllimport x86_thiscallcc void @"?virtualInlineDecl@Nested@ImportMembers@@UAEXXZ"(ptr {{[^,]*}}) // M64-DAG: declare dllimport void @"?virtualInlineDecl@Nested@ImportMembers@@UEAAXXZ"(ptr {{[^,]*}}) // G32-DAG: define dso_local x86_thiscallcc void @_ZN13ImportMembers6Nested10virtualDefEv(ptr {{[^,]*}} %this) - // C32-DAG: define dso_local void @_ZN13ImportMembers6Nested10virtualDefEv(ptr {{[^,]*}} %this) // G64-DAG: define dso_local void @_ZN13ImportMembers6Nested10virtualDefEv(ptr {{[^,]*}} %this) // G32-DAG: declare dllimport x86_thiscallcc void @_ZN13ImportMembers6Nested11virtualDeclEv(ptr {{[^,]*}}) - // C32-DAG: declare dllimport void @_ZN13ImportMembers6Nested11virtualDeclEv(ptr {{[^,]*}}) // G64-DAG: declare dllimport void @_ZN13ImportMembers6Nested11virtualDeclEv(ptr {{[^,]*}}) // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers6Nested14virtualInclassEv(ptr {{[^,]*}} %this) - // C32-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers6Nested14virtualInclassEv(ptr {{[^,]*}} %this) // G64-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers6Nested14virtualInclassEv(ptr {{[^,]*}} %this) // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers6Nested16virtualInlineDefEv(ptr {{[^,]*}} %this) - // C32-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers6Nested16virtualInlineDefEv(ptr {{[^,]*}} %this) // G64-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers6Nested16virtualInlineDefEv(ptr {{[^,]*}} %this) // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers6Nested17virtualInlineDeclEv(ptr {{[^,]*}} %this) - // C32-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers6Nested17virtualInlineDeclEv(ptr {{[^,]*}} %this) // G64-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers6Nested17virtualInlineDeclEv(ptr {{[^,]*}} %this) // MO1-DAG: define available_externally dllimport x86_thiscallcc void @"?virtualInclass@Nested@ImportMembers@@UAEXXZ"( @@ -332,9 +294,6 @@ struct ImportMembers::Nested { // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers6Nested14virtualInclassEv( // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers6Nested16virtualInlineDefEv( // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ImportMembers6Nested17virtualInlineDeclEv( - // CO1-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers6Nested14virtualInclassEv( - // CO1-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers6Nested16virtualInlineDefEv( - // CO1-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers6Nested17virtualInlineDeclEv( __declspec(dllimport) virtual void virtualDef(); // dllimport ignored __declspec(dllimport) virtual void virtualDecl(); __declspec(dllimport) virtual void virtualInclass() {} @@ -357,9 +316,6 @@ struct ImportMembers::Nested { // GO1-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers6Nested13staticInclassEv() // GO1-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers6Nested15staticInlineDefEv() // GO1-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers6Nested16staticInlineDeclEv() - // CO1-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers6Nested13staticInclassEv() - // CO1-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers6Nested15staticInlineDefEv() - // CO1-DAG: define linkonce_odr dso_local void @_ZN13ImportMembers6Nested16staticInlineDeclEv() __declspec(dllimport) static void staticDef(); // dllimport ignored __declspec(dllimport) static void staticDecl(); __declspec(dllimport) static void staticInclass() {} @@ -369,7 +325,6 @@ struct ImportMembers::Nested { // M32-DAG: declare dllimport x86_thiscallcc void @"?protectedNormalDecl@Nested@ImportMembers@@IAEXXZ"(ptr {{[^,]*}}) // M64-DAG: declare dllimport void @"?protectedNormalDecl@Nested@ImportMembers@@IEAAXXZ"(ptr {{[^,]*}}) // G32-DAG: declare dllimport x86_thiscallcc void @_ZN13ImportMembers6Nested19protectedNormalDeclEv(ptr {{[^,]*}} - // C32-DAG: declare dllimport void @_ZN13ImportMembers6Nested19protectedNormalDeclEv(ptr {{[^,]*}} // G64-DAG: declare dllimport void @_ZN13ImportMembers6Nested19protectedNormalDeclEv(ptr {{[^,]*}}) // MSC-DAG: declare dllimport void @"?protectedStaticDecl@Nested@ImportMembers@@KAXXZ"() // GNU-DAG: declare dllimport void @_ZN13ImportMembers6Nested19protectedStaticDeclEv() @@ -380,7 +335,6 @@ struct ImportMembers::Nested { // M32-DAG: declare dllimport x86_thiscallcc void @"?privateNormalDecl@Nested@ImportMembers@@AAEXXZ"(ptr {{[^,]*}}) // M64-DAG: declare dllimport void @"?privateNormalDecl@Nested@ImportMembers@@AEAAXXZ"(ptr {{[^,]*}}) // G32-DAG: declare dllimport x86_thiscallcc void @_ZN13ImportMembers6Nested17privateNormalDeclEv(ptr {{[^,]*}}) - // C32-DAG: declare dllimport void @_ZN13ImportMembers6Nested17privateNormalDeclEv(ptr {{[^,]*}}) // G64-DAG: declare dllimport void @_ZN13ImportMembers6Nested17privateNormalDeclEv(ptr {{[^,]*}}) // MSC-DAG: declare dllimport void @"?privateStaticDecl@Nested@ImportMembers@@CAXXZ"() // GNU-DAG: declare dllimport void @_ZN13ImportMembers6Nested17privateStaticDeclEv() @@ -391,7 +345,6 @@ struct ImportMembers::Nested { // M32-DAG: declare dso_local x86_thiscallcc void @"?ignored@Nested@ImportMembers@@QAEXXZ"(ptr {{[^,]*}}) // M64-DAG: declare dso_local void @"?ignored@Nested@ImportMembers@@QEAAXXZ"(ptr {{[^,]*}}) // G32-DAG: declare dso_local x86_thiscallcc void @_ZN13ImportMembers6Nested7ignoredEv(ptr {{[^,]*}}) - // C32-DAG: declare dso_local void @_ZN13ImportMembers6Nested7ignoredEv(ptr {{[^,]*}}) // G64-DAG: declare dso_local void @_ZN13ImportMembers6Nested7ignoredEv(ptr {{[^,]*}}) public: void ignored(); @@ -458,42 +411,36 @@ struct ImportSpecials { // M32-DAG: declare dllimport x86_thiscallcc ptr @"??0ImportSpecials@@QAE@XZ"(ptr {{[^,]*}} returned {{[^,]*}}) // M64-DAG: declare dllimport ptr @"??0ImportSpecials@@QEAA@XZ"(ptr {{[^,]*}} returned {{[^,]*}}) // G32-DAG: declare dllimport x86_thiscallcc void @_ZN14ImportSpecialsC1Ev(ptr {{[^,]*}}) - // C32-DAG: declare dllimport void @_ZN14ImportSpecialsC1Ev(ptr {{[^,]*}}) // G64-DAG: declare dllimport void @_ZN14ImportSpecialsC1Ev(ptr {{[^,]*}}) __declspec(dllimport) ImportSpecials(); // M32-DAG: declare dllimport x86_thiscallcc void @"??1ImportSpecials@@QAE@XZ"(ptr {{[^,]*}}) // M64-DAG: declare dllimport void @"??1ImportSpecials@@QEAA@XZ"(ptr {{[^,]*}}) // G32-DAG: declare dllimport x86_thiscallcc void @_ZN14ImportSpecialsD1Ev(ptr {{[^,]*}}) - // C32-DAG: declare dllimport void @_ZN14ImportSpecialsD1Ev(ptr {{[^,]*}}) // G64-DAG: declare dllimport void @_ZN14ImportSpecialsD1Ev(ptr {{[^,]*}}) __declspec(dllimport) ~ImportSpecials(); // M32-DAG: declare dllimport x86_thiscallcc ptr @"??0ImportSpecials@@QAE@ABU0@@Z"(ptr {{[^,]*}} returned {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) // M64-DAG: declare dllimport ptr @"??0ImportSpecials@@QEAA@AEBU0@@Z"(ptr {{[^,]*}} returned {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) // G32-DAG: declare dllimport x86_thiscallcc void @_ZN14ImportSpecialsC1ERKS_(ptr {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) - // C32-DAG: declare dllimport void @_ZN14ImportSpecialsC1ERKS_(ptr {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) // G64-DAG: declare dllimport void @_ZN14ImportSpecialsC1ERKS_(ptr {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) __declspec(dllimport) ImportSpecials(const ImportSpecials&); // M32-DAG: declare dllimport x86_thiscallcc nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @"??4ImportSpecials@@QAEAAU0@ABU0@@Z"(ptr {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) // M64-DAG: declare dllimport nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @"??4ImportSpecials@@QEAAAEAU0@AEBU0@@Z"(ptr {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) // G32-DAG: declare dllimport x86_thiscallcc nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN14ImportSpecialsaSERKS_(ptr {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) - // C32-DAG: declare dllimport nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN14ImportSpecialsaSERKS_(ptr {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) // G64-DAG: declare dllimport nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN14ImportSpecialsaSERKS_(ptr {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) __declspec(dllimport) ImportSpecials& operator=(const ImportSpecials&); // M32-DAG: declare dllimport x86_thiscallcc ptr @"??0ImportSpecials@@QAE@$$QAU0@@Z"(ptr {{[^,]*}} returned {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) // M64-DAG: declare dllimport ptr @"??0ImportSpecials@@QEAA@$$QEAU0@@Z"(ptr {{[^,]*}} returned {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) // G32-DAG: declare dllimport x86_thiscallcc void @_ZN14ImportSpecialsC1EOS_(ptr {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) - // C32-DAG: declare dllimport void @_ZN14ImportSpecialsC1EOS_(ptr {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) // G64-DAG: declare dllimport void @_ZN14ImportSpecialsC1EOS_(ptr {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) __declspec(dllimport) ImportSpecials(ImportSpecials&&); // M32-DAG: declare dllimport x86_thiscallcc nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @"??4ImportSpecials@@QAEAAU0@$$QAU0@@Z"(ptr {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) // M64-DAG: declare dllimport nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @"??4ImportSpecials@@QEAAAEAU0@$$QEAU0@@Z"(ptr {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) // G32-DAG: declare dllimport x86_thiscallcc nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN14ImportSpecialsaSEOS_(ptr {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) - // C32-DAG: declare dllimport nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN14ImportSpecialsaSEOS_(ptr {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) // G64-DAG: declare dllimport nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN14ImportSpecialsaSEOS_(ptr {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) __declspec(dllimport) ImportSpecials& operator=(ImportSpecials&&); }; @@ -505,61 +452,49 @@ struct ImportInlineSpecials { // M32-DAG: declare dllimport x86_thiscallcc ptr @"??0ImportInlineSpecials@@QAE@XZ"(ptr {{[^,]*}} returned {{[^,]*}}) // M64-DAG: declare dllimport ptr @"??0ImportInlineSpecials@@QEAA@XZ"(ptr {{[^,]*}} returned {{[^,]*}}) // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN20ImportInlineSpecialsC1Ev(ptr {{[^,]*}} %this) - // C32-DAG: define linkonce_odr dso_local void @_ZN20ImportInlineSpecialsC1Ev(ptr {{[^,]*}} %this) // G64-DAG: define linkonce_odr dso_local void @_ZN20ImportInlineSpecialsC1Ev(ptr {{[^,]*}} %this) // MO1-DAG: define available_externally dllimport x86_thiscallcc ptr @"??0ImportInlineSpecials@@QAE@XZ"( // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN20ImportInlineSpecialsC1Ev( - // CO1-DAG: define linkonce_odr dso_local void @_ZN20ImportInlineSpecialsC1Ev( __declspec(dllimport) ImportInlineSpecials() {} // M32-DAG: declare dllimport x86_thiscallcc void @"??1ImportInlineSpecials@@QAE@XZ"(ptr {{[^,]*}}) // M64-DAG: declare dllimport void @"??1ImportInlineSpecials@@QEAA@XZ"(ptr {{[^,]*}}) // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN20ImportInlineSpecialsD1Ev(ptr {{[^,]*}} %this) - // C32-DAG: define linkonce_odr dso_local void @_ZN20ImportInlineSpecialsD1Ev(ptr {{[^,]*}} %this) // G64-DAG: define linkonce_odr dso_local void @_ZN20ImportInlineSpecialsD1Ev(ptr {{[^,]*}} %this) // MO1-DAG: define available_externally dllimport x86_thiscallcc void @"??1ImportInlineSpecials@@QAE@XZ"( // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN20ImportInlineSpecialsD1Ev( - // CO1-DAG: define linkonce_odr dso_local void @_ZN20ImportInlineSpecialsD1Ev( __declspec(dllimport) ~ImportInlineSpecials() {} // M32-DAG: declare dllimport x86_thiscallcc ptr @"??0ImportInlineSpecials@@QAE@ABU0@@Z"(ptr {{[^,]*}} returned {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) // M64-DAG: declare dllimport ptr @"??0ImportInlineSpecials@@QEAA@AEBU0@@Z"(ptr {{[^,]*}} returned {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN20ImportInlineSpecialsC1ERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) - // C32-DAG: define linkonce_odr dso_local void @_ZN20ImportInlineSpecialsC1ERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G64-DAG: define linkonce_odr dso_local void @_ZN20ImportInlineSpecialsC1ERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // MO1-DAG: define available_externally dllimport x86_thiscallcc ptr @"??0ImportInlineSpecials@@QAE@ABU0@@Z"( // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN20ImportInlineSpecialsC1ERKS_( - // CO1-DAG: define linkonce_odr dso_local void @_ZN20ImportInlineSpecialsC1ERKS_( __declspec(dllimport) inline ImportInlineSpecials(const ImportInlineSpecials&); // M32-DAG: declare dllimport x86_thiscallcc nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @"??4ImportInlineSpecials@@QAEAAU0@ABU0@@Z"(ptr {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) // M64-DAG: declare dllimport nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @"??4ImportInlineSpecials@@QEAAAEAU0@AEBU0@@Z"(ptr {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) // G32-DAG: define linkonce_odr dso_local x86_thiscallcc nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN20ImportInlineSpecialsaSERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) - // C32-DAG: define linkonce_odr dso_local nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN20ImportInlineSpecialsaSERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G64-DAG: define linkonce_odr dso_local nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN20ImportInlineSpecialsaSERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // MO1-DAG: define available_externally dllimport x86_thiscallcc nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @"??4ImportInlineSpecials@@QAEAAU0@ABU0@@Z"( // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN20ImportInlineSpecialsaSERKS_( - // CO1-DAG: define linkonce_odr dso_local nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN20ImportInlineSpecialsaSERKS_( __declspec(dllimport) ImportInlineSpecials& operator=(const ImportInlineSpecials&); // M32-DAG: declare dllimport x86_thiscallcc ptr @"??0ImportInlineSpecials@@QAE@$$QAU0@@Z"(ptr {{[^,]*}} returned {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) // M64-DAG: declare dllimport ptr @"??0ImportInlineSpecials@@QEAA@$$QEAU0@@Z"(ptr {{[^,]*}} returned {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN20ImportInlineSpecialsC1EOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) - // C32-DAG: define linkonce_odr dso_local void @_ZN20ImportInlineSpecialsC1EOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G64-DAG: define linkonce_odr dso_local void @_ZN20ImportInlineSpecialsC1EOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // MO1-DAG: define available_externally dllimport x86_thiscallcc ptr @"??0ImportInlineSpecials@@QAE@$$QAU0@@Z"( // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN20ImportInlineSpecialsC1EOS_( - // CO1-DAG: define linkonce_odr dso_local void @_ZN20ImportInlineSpecialsC1EOS_( __declspec(dllimport) ImportInlineSpecials(ImportInlineSpecials&&) {} // M32-DAG: declare dllimport x86_thiscallcc nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @"??4ImportInlineSpecials@@QAEAAU0@$$QAU0@@Z"(ptr {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) // M64-DAG: declare dllimport nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @"??4ImportInlineSpecials@@QEAAAEAU0@$$QEAU0@@Z"(ptr {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) // G32-DAG: define linkonce_odr dso_local x86_thiscallcc nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN20ImportInlineSpecialsaSEOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) - // C32-DAG: define linkonce_odr dso_local nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN20ImportInlineSpecialsaSEOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G64-DAG: define linkonce_odr dso_local nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN20ImportInlineSpecialsaSEOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // MO1-DAG: define available_externally dllimport x86_thiscallcc nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @"??4ImportInlineSpecials@@QAEAAU0@$$QAU0@@Z"( // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN20ImportInlineSpecialsaSEOS_( - // CO1-DAG: define linkonce_odr dso_local nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN20ImportInlineSpecialsaSEOS_( __declspec(dllimport) ImportInlineSpecials& operator=(ImportInlineSpecials&&) { return *this; } }; ImportInlineSpecials::ImportInlineSpecials(const ImportInlineSpecials&) {} @@ -572,61 +507,49 @@ struct ImportDefaulted { // M32-DAG: declare dllimport x86_thiscallcc ptr @"??0ImportDefaulted@@QAE@XZ"(ptr {{[^,]*}} returned {{[^,]*}}) // M64-DAG: declare dllimport ptr @"??0ImportDefaulted@@QEAA@XZ"(ptr {{[^,]*}} returned {{[^,]*}}) // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN15ImportDefaultedC1Ev(ptr {{[^,]*}} %this) - // C32-DAG: define linkonce_odr dso_local void @_ZN15ImportDefaultedC1Ev(ptr {{[^,]*}} %this) // G64-DAG: define linkonce_odr dso_local void @_ZN15ImportDefaultedC1Ev(ptr {{[^,]*}} %this) // MO1-DAG: define available_externally dllimport x86_thiscallcc ptr @"??0ImportDefaulted@@QAE@XZ"(ptr {{[^,]*}} returned {{[^,]*}} %this) // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN15ImportDefaultedC1Ev(ptr {{[^,]*}} %this) - // CO1-DAG: define linkonce_odr dso_local void @_ZN15ImportDefaultedC1Ev(ptr {{[^,]*}} %this) __declspec(dllimport) ImportDefaulted() = default; // M32-DAG: declare dllimport x86_thiscallcc void @"??1ImportDefaulted@@QAE@XZ"(ptr {{[^,]*}}) // M64-DAG: declare dllimport void @"??1ImportDefaulted@@QEAA@XZ"(ptr {{[^,]*}}) // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN15ImportDefaultedD1Ev(ptr {{[^,]*}} %this) - // C32-DAG: define linkonce_odr dso_local void @_ZN15ImportDefaultedD1Ev(ptr {{[^,]*}} %this) // G64-DAG: define linkonce_odr dso_local void @_ZN15ImportDefaultedD1Ev(ptr {{[^,]*}} %this) // MO1-DAG: define available_externally dllimport x86_thiscallcc void @"??1ImportDefaulted@@QAE@XZ"(ptr {{[^,]*}} %this) // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN15ImportDefaultedD1Ev(ptr {{[^,]*}} %this) - // CO1-DAG: define linkonce_odr dso_local void @_ZN15ImportDefaultedD1Ev(ptr {{[^,]*}} %this) __declspec(dllimport) ~ImportDefaulted() = default; // M32-DAG: declare dllimport x86_thiscallcc ptr @"??0ImportDefaulted@@QAE@ABU0@@Z"(ptr {{[^,]*}} returned {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) // M64-DAG: declare dllimport ptr @"??0ImportDefaulted@@QEAA@AEBU0@@Z"(ptr {{[^,]*}} returned {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN15ImportDefaultedC1ERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) - // C32-DAG: define linkonce_odr dso_local void @_ZN15ImportDefaultedC1ERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G64-DAG: define linkonce_odr dso_local void @_ZN15ImportDefaultedC1ERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // MO1-DAG: define available_externally dllimport x86_thiscallcc ptr @"??0ImportDefaulted@@QAE@ABU0@@Z"(ptr {{[^,]*}} returned {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN15ImportDefaultedC1ERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) - // CO1-DAG: define linkonce_odr dso_local void @_ZN15ImportDefaultedC1ERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) __declspec(dllimport) ImportDefaulted(const ImportDefaulted&) = default; // M32-DAG: declare dllimport x86_thiscallcc nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @"??4ImportDefaulted@@QAEAAU0@ABU0@@Z"(ptr {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) // M64-DAG: declare dllimport nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @"??4ImportDefaulted@@QEAAAEAU0@AEBU0@@Z"(ptr {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) // G32-DAG: define linkonce_odr dso_local x86_thiscallcc nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN15ImportDefaultedaSERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) - // C32-DAG: define linkonce_odr dso_local nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN15ImportDefaultedaSERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G64-DAG: define linkonce_odr dso_local nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN15ImportDefaultedaSERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // MO1-DAG: define available_externally dllimport x86_thiscallcc nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @"??4ImportDefaulted@@QAEAAU0@ABU0@@Z"(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN15ImportDefaultedaSERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) - // CO1-DAG: define linkonce_odr dso_local nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN15ImportDefaultedaSERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) __declspec(dllimport) ImportDefaulted& operator=(const ImportDefaulted&) = default; // M32-DAG: declare dllimport x86_thiscallcc ptr @"??0ImportDefaulted@@QAE@$$QAU0@@Z"(ptr {{[^,]*}} returned {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) // M64-DAG: declare dllimport ptr @"??0ImportDefaulted@@QEAA@$$QEAU0@@Z"(ptr {{[^,]*}} returned {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN15ImportDefaultedC1EOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) - // C32-DAG: define linkonce_odr dso_local void @_ZN15ImportDefaultedC1EOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G64-DAG: define linkonce_odr dso_local void @_ZN15ImportDefaultedC1EOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // MO1-DAG: define available_externally dllimport x86_thiscallcc ptr @"??0ImportDefaulted@@QAE@$$QAU0@@Z"(ptr {{[^,]*}} returned {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN15ImportDefaultedC1EOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) - // CO1-DAG: define linkonce_odr dso_local void @_ZN15ImportDefaultedC1EOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) __declspec(dllimport) ImportDefaulted(ImportDefaulted&&) = default; // M32-DAG: declare dllimport x86_thiscallcc nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @"??4ImportDefaulted@@QAEAAU0@$$QAU0@@Z"(ptr {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) // M64-DAG: declare dllimport nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @"??4ImportDefaulted@@QEAAAEAU0@$$QEAU0@@Z"(ptr {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) // G32-DAG: define linkonce_odr dso_local x86_thiscallcc nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN15ImportDefaultedaSEOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) - // C32-DAG: define linkonce_odr dso_local nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN15ImportDefaultedaSEOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G64-DAG: define linkonce_odr dso_local nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN15ImportDefaultedaSEOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // MO1-DAG: define available_externally dllimport x86_thiscallcc nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @"??4ImportDefaulted@@QAEAAU0@$$QAU0@@Z"(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // GO1-DAG: define linkonce_odr dso_local x86_thiscallcc nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN15ImportDefaultedaSEOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) - // CO1-DAG: define linkonce_odr dso_local nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN15ImportDefaultedaSEOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) __declspec(dllimport) ImportDefaulted& operator=(ImportDefaulted&&) = default; ForceNonTrivial v; // ensure special members are non-trivial @@ -663,31 +586,26 @@ __declspec(dllimport) ImportDefaultedDefs::~ImportDefaultedDefs() = default; // M32-DAG: declare dllimport x86_thiscallcc ptr @"??0ImportDefaultedDefs@@QAE@ABU0@@Z"(ptr {{[^,]*}} returned {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) // M64-DAG: declare dllimport ptr @"??0ImportDefaultedDefs@@QEAA@AEBU0@@Z"(ptr {{[^,]*}} returned {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN19ImportDefaultedDefsC1ERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) -// C32-DAG: define linkonce_odr dso_local void @_ZN19ImportDefaultedDefsC1ERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G64-DAG: define linkonce_odr dso_local void @_ZN19ImportDefaultedDefsC1ERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) inline ImportDefaultedDefs::ImportDefaultedDefs(const ImportDefaultedDefs&) = default; // M32-DAG: declare dllimport x86_thiscallcc nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @"??4ImportDefaultedDefs@@QAEAAU0@ABU0@@Z"(ptr {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) // M64-DAG: declare dllimport nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @"??4ImportDefaultedDefs@@QEAAAEAU0@AEBU0@@Z"(ptr {{[^,]*}}, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}})) // G32-DAG: define linkonce_odr dso_local x86_thiscallcc nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN19ImportDefaultedDefsaSERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) -// C32-DAG: define linkonce_odr dso_local nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN19ImportDefaultedDefsaSERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G64-DAG: define linkonce_odr dso_local nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN19ImportDefaultedDefsaSERKS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) inline ImportDefaultedDefs& ImportDefaultedDefs::operator=(const ImportDefaultedDefs&) = default; // M32-DAG: define dso_local dllexport x86_thiscallcc ptr @"??0ImportDefaultedDefs@@QAE@$$QAU0@@Z"(ptr {{[^,]*}} returned {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // M64-DAG: define dso_local dllexport ptr @"??0ImportDefaultedDefs@@QEAA@$$QEAU0@@Z"(ptr {{[^,]*}} returned {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G32-DAG: define dso_local x86_thiscallcc void @_ZN19ImportDefaultedDefsC1EOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) -// C32-DAG: define dso_local void @_ZN19ImportDefaultedDefsC1EOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G64-DAG: define dso_local void @_ZN19ImportDefaultedDefsC1EOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G32-DAG: define dso_local x86_thiscallcc void @_ZN19ImportDefaultedDefsC2EOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) -// C32-DAG: define dso_local void @_ZN19ImportDefaultedDefsC2EOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G64-DAG: define dso_local void @_ZN19ImportDefaultedDefsC2EOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) ImportDefaultedDefs::ImportDefaultedDefs(ImportDefaultedDefs&&) = default; // dllimport ignored // M32-DAG: define dso_local dllexport x86_thiscallcc nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @"??4ImportDefaultedDefs@@QAEAAU0@$$QAU0@@Z"(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // M64-DAG: define dso_local dllexport nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @"??4ImportDefaultedDefs@@QEAAAEAU0@$$QEAU0@@Z"(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G32-DAG: define dso_local x86_thiscallcc nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN19ImportDefaultedDefsaSEOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) -// C32-DAG: define dso_local nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN19ImportDefaultedDefsaSEOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) // G64-DAG: define dso_local nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr @_ZN19ImportDefaultedDefsaSEOS_(ptr {{[^,]*}} %this, ptr nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %0) ImportDefaultedDefs& ImportDefaultedDefs::operator=(ImportDefaultedDefs&&) = default; // dllimport ignored @@ -705,28 +623,24 @@ struct ImportAlloc { // M32-DAG: declare dllimport ptr @"??2ImportAlloc@@SAPAXI@Z"(i32) // M64-DAG: declare dllimport ptr @"??2ImportAlloc@@SAPEAX_K@Z"(i64) // G32-DAG: declare dllimport ptr @_ZN11ImportAllocnwEj(i32) -// C32-DAG: declare dllimport ptr @_ZN11ImportAllocnwEj(i32) -// G64-DAG: declare dllimport ptr @_ZN11ImportAllocnwE{{[ym]}}(i64) +// G64-DAG: declare dllimport ptr @_ZN11ImportAllocnwEy(i64) void UNIQ(use)() { new ImportAlloc(); } // M32-DAG: declare dllimport ptr @"??_UImportAlloc@@SAPAXI@Z"(i32) // M64-DAG: declare dllimport ptr @"??_UImportAlloc@@SAPEAX_K@Z"(i64) // G32-DAG: declare dllimport ptr @_ZN11ImportAllocnaEj(i32) -// C32-DAG: declare dllimport ptr @_ZN11ImportAllocnaEj(i32) -// G64-DAG: declare dllimport ptr @_ZN11ImportAllocnaE{{[ym]}}(i64) +// G64-DAG: declare dllimport ptr @_ZN11ImportAllocnaEy(i64) void UNIQ(use)() { new ImportAlloc[1]; } // M32-DAG: declare dllimport void @"??3ImportAlloc@@SAXPAX@Z"(ptr) // M64-DAG: declare dllimport void @"??3ImportAlloc@@SAXPEAX@Z"(ptr) // G32-DAG: declare dllimport void @_ZN11ImportAllocdlEPv(ptr) -// C32-DAG: declare dllimport void @_ZN11ImportAllocdlEPv(ptr) // G64-DAG: declare dllimport void @_ZN11ImportAllocdlEPv(ptr) void UNIQ(use)(ImportAlloc* ptr) { delete ptr; } // M32-DAG: declare dllimport void @"??_VImportAlloc@@SAXPAX@Z"(ptr) // M64-DAG: declare dllimport void @"??_VImportAlloc@@SAXPEAX@Z"(ptr) // G32-DAG: declare dllimport void @_ZN11ImportAllocdaEPv(ptr) -// C32-DAG: declare dllimport void @_ZN11ImportAllocdaEPv(ptr) // G64-DAG: declare dllimport void @_ZN11ImportAllocdaEPv(ptr) void UNIQ(use)(ImportAlloc* ptr) { delete[] ptr; } @@ -746,7 +660,6 @@ struct MemFunTmpl { // M32-DAG: declare dllimport x86_thiscallcc void @"??$importedNormal@UImplicitInst_Imported@@@MemFunTmpl@@QAEXXZ"(ptr {{[^,]*}}) // M64-DAG: declare dllimport void @"??$importedNormal@UImplicitInst_Imported@@@MemFunTmpl@@QEAAXXZ"(ptr {{[^,]*}}) // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN10MemFunTmpl14importedNormalI21ImplicitInst_ImportedEEvv(ptr {{[^,]*}} %this) -// C32-DAG: define linkonce_odr dso_local void @_ZN10MemFunTmpl14importedNormalI21ImplicitInst_ImportedEEvv(ptr {{[^,]*}} %this) // G64-DAG: define linkonce_odr dso_local void @_ZN10MemFunTmpl14importedNormalI21ImplicitInst_ImportedEEvv(ptr {{[^,]*}} %this) USEMF(MemFunTmpl, importedNormal) @@ -760,7 +673,6 @@ USE(MemFunTmpl::importedStatic) // M32-DAG: declare dllimport x86_thiscallcc void @"??$importedNormal@UExplicitDecl_Imported@@@MemFunTmpl@@QAEXXZ"(ptr {{[^,]*}}) // M64-DAG: declare dllimport void @"??$importedNormal@UExplicitDecl_Imported@@@MemFunTmpl@@QEAAXXZ"(ptr {{[^,]*}}) // G32-DAG: declare dso_local x86_thiscallcc void @_ZN10MemFunTmpl14importedNormalI21ExplicitDecl_ImportedEEvv(ptr {{[^,]*}}) -// C32-DAG: declare dso_local void @_ZN10MemFunTmpl14importedNormalI21ExplicitDecl_ImportedEEvv(ptr {{[^,]*}}) // G64-DAG: declare dso_local void @_ZN10MemFunTmpl14importedNormalI21ExplicitDecl_ImportedEEvv(ptr {{[^,]*}}) extern template void MemFunTmpl::importedNormal(); USEMF(MemFunTmpl, importedNormal) @@ -776,7 +688,6 @@ USE(MemFunTmpl::importedStatic) // M32-DAG: declare dllimport x86_thiscallcc void @"??$importedNormal@UExplicitInst_Imported@@@MemFunTmpl@@QAEXXZ"(ptr {{[^,]*}}) // M64-DAG: declare dllimport void @"??$importedNormal@UExplicitInst_Imported@@@MemFunTmpl@@QEAAXXZ"(ptr {{[^,]*}}) // G32-DAG: define weak_odr dso_local x86_thiscallcc void @_ZN10MemFunTmpl14importedNormalI21ExplicitInst_ImportedEEvv(ptr {{[^,]*}} %this) -// C32-DAG: define weak_odr dso_local void @_ZN10MemFunTmpl14importedNormalI21ExplicitInst_ImportedEEvv(ptr {{[^,]*}} %this) // G64-DAG: define weak_odr dso_local void @_ZN10MemFunTmpl14importedNormalI21ExplicitInst_ImportedEEvv(ptr {{[^,]*}} %this) template void MemFunTmpl::importedNormal(); USEMF(MemFunTmpl, importedNormal) @@ -791,7 +702,6 @@ USE(MemFunTmpl::importedStatic) // M32-DAG: declare dllimport x86_thiscallcc void @"??$importedNormal@UExplicitSpec_Imported@@@MemFunTmpl@@QAEXXZ"(ptr {{[^,]*}}) // M64-DAG: declare dllimport void @"??$importedNormal@UExplicitSpec_Imported@@@MemFunTmpl@@QEAAXXZ"(ptr {{[^,]*}}) // G32-DAG: declare dllimport x86_thiscallcc void @_ZN10MemFunTmpl14importedNormalI21ExplicitSpec_ImportedEEvv(ptr {{[^,]*}}) -// C32-DAG: declare dllimport void @_ZN10MemFunTmpl14importedNormalI21ExplicitSpec_ImportedEEvv(ptr {{[^,]*}}) // G64-DAG: declare dllimport void @_ZN10MemFunTmpl14importedNormalI21ExplicitSpec_ImportedEEvv(ptr {{[^,]*}}) template<> __declspec(dllimport) void MemFunTmpl::importedNormal(); USEMF(MemFunTmpl, importedNormal) @@ -806,7 +716,6 @@ USEMF(MemFunTmpl, importedNormal) // M32-DAG: declare dllimport x86_thiscallcc void @"??$importedNormal@UExplicitSpec_InlineDef_Imported@@@MemFunTmpl@@QAEXXZ"(ptr {{[^,]*}}) // M64-DAG: declare dllimport void @"??$importedNormal@UExplicitSpec_InlineDef_Imported@@@MemFunTmpl@@QEAAXXZ"(ptr {{[^,]*}}) // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN10MemFunTmpl14importedNormalI31ExplicitSpec_InlineDef_ImportedEEvv(ptr {{[^,]*}} %this) -// C32-DAG: define linkonce_odr dso_local void @_ZN10MemFunTmpl14importedNormalI31ExplicitSpec_InlineDef_ImportedEEvv(ptr {{[^,]*}} %this) // G64-DAG: define linkonce_odr dso_local void @_ZN10MemFunTmpl14importedNormalI31ExplicitSpec_InlineDef_ImportedEEvv(ptr {{[^,]*}} %this) template<> __declspec(dllimport) inline void MemFunTmpl::importedNormal() {} USEMF(MemFunTmpl, importedNormal) @@ -834,7 +743,6 @@ USE(MemFunTmpl::importedStatic) // M32-DAG: define dso_local x86_thiscallcc void @"??$importedNormal@UExplicitSpec_NotImported@@@MemFunTmpl@@QAEXXZ"(ptr {{[^,]*}} %this) // M64-DAG: define dso_local void @"??$importedNormal@UExplicitSpec_NotImported@@@MemFunTmpl@@QEAAXXZ"(ptr {{[^,]*}} %this) // G32-DAG: define dso_local x86_thiscallcc void @_ZN10MemFunTmpl14importedNormalI24ExplicitSpec_NotImportedEEvv(ptr {{[^,]*}} %this) -// C32-DAG: define dso_local void @_ZN10MemFunTmpl14importedNormalI24ExplicitSpec_NotImportedEEvv(ptr {{[^,]*}} %this) // G64-DAG: define dso_local void @_ZN10MemFunTmpl14importedNormalI24ExplicitSpec_NotImportedEEvv(ptr {{[^,]*}} %this) template<> void MemFunTmpl::importedNormal() {} USEMF(MemFunTmpl, importedNormal) @@ -850,7 +758,6 @@ USE(MemFunTmpl::importedStatic) // M32-DAG: declare dllimport x86_thiscallcc void @"??$normalDef@UExplicitDecl_Imported@@@MemFunTmpl@@QAEXXZ"(ptr {{[^,]*}}) // M64-DAG: declare dllimport void @"??$normalDef@UExplicitDecl_Imported@@@MemFunTmpl@@QEAAXXZ"(ptr {{[^,]*}}) // G32-DAG: declare dso_local x86_thiscallcc void @_ZN10MemFunTmpl9normalDefI21ExplicitDecl_ImportedEEvv(ptr {{[^,]*}}) -// C32-DAG: declare dso_local void @_ZN10MemFunTmpl9normalDefI21ExplicitDecl_ImportedEEvv(ptr {{[^,]*}}) // G64-DAG: declare dso_local void @_ZN10MemFunTmpl9normalDefI21ExplicitDecl_ImportedEEvv(ptr {{[^,]*}}) extern template __declspec(dllimport) void MemFunTmpl::normalDef(); USEMF(MemFunTmpl, normalDef) @@ -866,7 +773,6 @@ USE(MemFunTmpl::staticDef) // M32-DAG: declare dllimport x86_thiscallcc void @"??$normalDef@UExplicitInst_Imported@@@MemFunTmpl@@QAEXXZ"(ptr {{[^,]*}}) // M64-DAG: declare dllimport void @"??$normalDef@UExplicitInst_Imported@@@MemFunTmpl@@QEAAXXZ"(ptr {{[^,]*}}) // G32-DAG: define weak_odr dso_local x86_thiscallcc void @_ZN10MemFunTmpl9normalDefI21ExplicitInst_ImportedEEvv(ptr {{[^,]*}} %this) -// C32-DAG: define weak_odr dso_local void @_ZN10MemFunTmpl9normalDefI21ExplicitInst_ImportedEEvv(ptr {{[^,]*}} %this) // G64-DAG: define weak_odr dso_local void @_ZN10MemFunTmpl9normalDefI21ExplicitInst_ImportedEEvv(ptr {{[^,]*}} %this) template __declspec(dllimport) void MemFunTmpl::normalDef(); USEMF(MemFunTmpl, normalDef) @@ -881,7 +787,6 @@ USE(MemFunTmpl::staticDef) // M32-DAG: declare dllimport x86_thiscallcc void @"??$normalDef@UExplicitSpec_Imported@@@MemFunTmpl@@QAEXXZ"(ptr {{[^,]*}}) // M64-DAG: declare dllimport void @"??$normalDef@UExplicitSpec_Imported@@@MemFunTmpl@@QEAAXXZ"(ptr {{[^,]*}}) // G32-DAG: declare dllimport x86_thiscallcc void @_ZN10MemFunTmpl9normalDefI21ExplicitSpec_ImportedEEvv(ptr {{[^,]*}}) -// C32-DAG: declare dllimport void @_ZN10MemFunTmpl9normalDefI21ExplicitSpec_ImportedEEvv(ptr {{[^,]*}}) // G64-DAG: declare dllimport void @_ZN10MemFunTmpl9normalDefI21ExplicitSpec_ImportedEEvv(ptr {{[^,]*}}) template<> __declspec(dllimport) void MemFunTmpl::normalDef(); USEMF(MemFunTmpl, normalDef) @@ -896,7 +801,6 @@ USEMF(MemFunTmpl, normalDef) // M32-DAG: declare dllimport x86_thiscallcc void @"??$normalDef@UExplicitSpec_InlineDef_Imported@@@MemFunTmpl@@QAEXXZ"(ptr {{[^,]*}}) // M64-DAG: declare dllimport void @"??$normalDef@UExplicitSpec_InlineDef_Imported@@@MemFunTmpl@@QEAAXXZ"(ptr {{[^,]*}}) // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN10MemFunTmpl9normalDefI31ExplicitSpec_InlineDef_ImportedEEvv(ptr {{[^,]*}} %this) -// C32-DAG: define linkonce_odr dso_local void @_ZN10MemFunTmpl9normalDefI31ExplicitSpec_InlineDef_ImportedEEvv(ptr {{[^,]*}} %this) // G64-DAG: define linkonce_odr dso_local void @_ZN10MemFunTmpl9normalDefI31ExplicitSpec_InlineDef_ImportedEEvv(ptr {{[^,]*}} %this) template<> __declspec(dllimport) inline void MemFunTmpl::normalDef() {} USEMF(MemFunTmpl, normalDef) @@ -984,12 +888,10 @@ template struct ClassTmplMem { // MSVC imports explicit specialization of imported class template member function; MinGW does not. // M32-DAG: declare dllimport x86_thiscallcc void @"?importedNormal@?$ClassTmplMem@H@@QAEXXZ" // G32-DAG: declare dso_local x86_thiscallcc void @_ZN12ClassTmplMemIiE14importedNormalEv -// C32-DAG: declare dso_local void @_ZN12ClassTmplMemIiE14importedNormalEv template<> void ClassTmplMem::importedNormal(); USEMF(ClassTmplMem, importedNormal); // M32-DAG: declare dllimport void @"?importedStatic@?$ClassTmplMem@H@@SAXXZ" // G32-DAG: declare dso_local void @_ZN12ClassTmplMemIiE14importedStaticEv -// C32-DAG: declare dso_local void @_ZN12ClassTmplMemIiE14importedStaticEv template<> void ClassTmplMem::importedStatic(); USEMF(ClassTmplMem, importedStatic); diff --git a/clang/test/CodeGenCXX/dllimport-missing-key.cpp b/clang/test/CodeGenCXX/dllimport-missing-key.cpp index 9eb9e7b5bbb0d..d8ef7aa7ea680 100644 --- a/clang/test/CodeGenCXX/dllimport-missing-key.cpp +++ b/clang/test/CodeGenCXX/dllimport-missing-key.cpp @@ -1,5 +1,4 @@ // RUN: %clang_cc1 -triple i686-windows-gnu -emit-llvm -std=c++1y -O0 -o - %s -w | FileCheck --check-prefix=GNU %s -// RUN: %clang_cc1 -triple i686-pc-cygwin -emit-llvm -std=c++1y -O0 -o - %s -w | FileCheck --check-prefix=GNU %s class __declspec(dllimport) QObjectData { public: diff --git a/clang/test/CodeGenCXX/dllimport-rtti.cpp b/clang/test/CodeGenCXX/dllimport-rtti.cpp index 046bf5020398d..f23a242c4bedc 100644 --- a/clang/test/CodeGenCXX/dllimport-rtti.cpp +++ b/clang/test/CodeGenCXX/dllimport-rtti.cpp @@ -1,8 +1,5 @@ -// RUN: %clang_cc1 -triple i686-windows-msvc -emit-llvm -std=c++1y -fms-extensions -O1 -disable-llvm-passes -o - %s | FileCheck %s --check-prefix=MSVC -// RUN: %clang_cc1 -triple i686-windows-gnu -emit-llvm -std=c++1y -fms-extensions -O1 -disable-llvm-passes -o - %s | FileCheck %s --check-prefix=GNU -// RUN: %clang_cc1 -triple x86_64-windows-gnu -emit-llvm -std=c++1y -fms-extensions -O1 -disable-llvm-passes -o - %s | FileCheck %s --check-prefix=GNU -// RUN: %clang_cc1 -triple i686-pc-cygwin -emit-llvm -std=c++1y -fms-extensions -O1 -disable-llvm-passes -o - %s | FileCheck %s --check-prefix=GNU -// RUN: %clang_cc1 -triple x86_64-pc-cygwin -emit-llvm -std=c++1y -fms-extensions -O1 -disable-llvm-passes -o - %s | FileCheck %s --check-prefix=GNU +// RUN: %clang_cc1 -triple i686-windows-msvc -emit-llvm -std=c++1y -fms-extensions -O1 -disable-llvm-passes -o - %s | FileCheck %s --check-prefix=MSVC +// RUN: %clang_cc1 -triple i686-windows-gnu -emit-llvm -std=c++1y -fms-extensions -O1 -disable-llvm-passes -o - %s | FileCheck %s --check-prefix=GNU struct __declspec(dllimport) S { virtual void f() {} diff --git a/clang/test/CodeGenCXX/dllimport.cpp b/clang/test/CodeGenCXX/dllimport.cpp index 363f97a8d58ee..484866b45389f 100644 --- a/clang/test/CodeGenCXX/dllimport.cpp +++ b/clang/test/CodeGenCXX/dllimport.cpp @@ -2,18 +2,15 @@ // RUN: %clang_cc1 -no-enable-noundef-analysis -triple x86_64-windows-msvc -fno-rtti -fno-threadsafe-statics -fms-extensions -emit-llvm -std=c++1y -O0 -o - %s -DMSABI -w | FileCheck --check-prefix=MSC --check-prefix=M64 %s // RUN: %clang_cc1 -no-enable-noundef-analysis -triple i686-windows-gnu -fno-rtti -fno-threadsafe-statics -fms-extensions -emit-llvm -std=c++1y -O0 -o - %s -w | FileCheck --check-prefix=GNU --check-prefix=G32 %s // RUN: %clang_cc1 -no-enable-noundef-analysis -triple x86_64-windows-gnu -fno-rtti -fno-threadsafe-statics -fms-extensions -emit-llvm -std=c++1y -O0 -o - %s -w | FileCheck --check-prefix=GNU %s -// RUN: %clang_cc1 -no-enable-noundef-analysis -triple i686-pc-cygwin -fno-rtti -fno-threadsafe-statics -fms-extensions -emit-llvm -std=c++1y -O0 -o - %s -w | FileCheck --check-prefix=GNU --check-prefix=C32 %s -// RUN: %clang_cc1 -no-enable-noundef-analysis -triple x86_64-pc-cygwin -fno-rtti -fno-threadsafe-statics -fms-extensions -emit-llvm -std=c++1y -O0 -o - %s -w | FileCheck --check-prefix=GNU %s // RUN: %clang_cc1 -no-enable-noundef-analysis -triple i686-windows-msvc -fno-rtti -fno-threadsafe-statics -fms-extensions -fms-compatibility-version=18.00 -emit-llvm -std=c++1y -O1 -disable-llvm-passes -o - %s -DMSABI -w | FileCheck --check-prefix=MO1 --check-prefix=M18 %s // RUN: %clang_cc1 -no-enable-noundef-analysis -triple i686-windows-msvc -fno-rtti -fno-threadsafe-statics -fms-extensions -fms-compatibility-version=19.00 -emit-llvm -std=c++1y -O1 -disable-llvm-passes -o - %s -DMSABI -w | FileCheck --check-prefix=MO1 --check-prefix=M19 %s // RUN: %clang_cc1 -no-enable-noundef-analysis -triple i686-windows-gnu -fno-rtti -fno-threadsafe-statics -fms-extensions -emit-llvm -std=c++1y -O1 -disable-llvm-passes -o - %s -w | FileCheck --check-prefix=GO1 %s -// RUN: %clang_cc1 -no-enable-noundef-analysis -triple x86_64-scei-ps4 -fno-rtti -fno-threadsafe-statics -fms-extensions -emit-llvm -std=c++1y -O0 -o - %s -w | FileCheck --check-prefix=PS %s -// RUN: %clang_cc1 -no-enable-noundef-analysis -triple x86_64-sie-ps5 -fno-rtti -fno-threadsafe-statics -fms-extensions -emit-llvm -std=c++1y -O0 -o - %s -w | FileCheck --check-prefix=PS %s +// RUN: %clang_cc1 -no-enable-noundef-analysis -triple x86_64-scei-ps4 -fno-rtti -fno-threadsafe-statics -fms-extensions -emit-llvm -std=c++1y -O0 -o - %s -w | FileCheck --check-prefix=PS %s +// RUN: %clang_cc1 -no-enable-noundef-analysis -triple x86_64-sie-ps5 -fno-rtti -fno-threadsafe-statics -fms-extensions -emit-llvm -std=c++1y -O0 -o - %s -w | FileCheck --check-prefix=PS %s // CHECK-NOT doesn't play nice with CHECK-DAG, so use separate run lines. // RUN: %clang_cc1 -no-enable-noundef-analysis -triple i686-windows-msvc -fno-rtti -fno-threadsafe-statics -fms-extensions -emit-llvm -std=c++1y -O0 -o - %s -DMSABI -w | FileCheck --check-prefix=MSC2 %s // RUN: %clang_cc1 -no-enable-noundef-analysis -triple i686-windows-gnu -fno-rtti -fno-threadsafe-statics -fms-extensions -emit-llvm -std=c++1y -O0 -o - %s -w | FileCheck --check-prefix=GNU2 %s -// RUN: %clang_cc1 -no-enable-noundef-analysis -triple i686-pc-cygwin -fno-rtti -fno-threadsafe-statics -fms-extensions -emit-llvm -std=c++1y -O0 -o - %s -w | FileCheck --check-prefix=GNU2 %s // Helper structs to make templates more expressive. struct ImplicitInst_Imported {}; @@ -208,7 +205,7 @@ USEVAR(VarTmpl) // Functions //===----------------------------------------------------------------------===// -// GNU-DAG: declare dso_local void @_ZdlPv{{[jym]}}(ptr, i{{32|64}}) +// GNU-DAG: declare dso_local void @_ZdlPv{{j|y}}(ptr, i{{32|64}}) // Import function declaration. // MSC-DAG: declare dllimport void @"?decl@@YAXXZ"() @@ -314,7 +311,7 @@ USE(friend5) // Implicit declarations can be redeclared with dllimport. // MSC-DAG: declare dllimport nonnull ptr @"??2@{{YAPAXI|YAPEAX_K}}@Z"( -// GNU-DAG: declare dllimport nonnull ptr @_Znw{{[yjm]}}( +// GNU-DAG: declare dllimport nonnull ptr @_Znw{{[yj]}}( __declspec(dllimport) void* operator new(__SIZE_TYPE__ n); void UNIQ(use)() { ::operator new(42); } @@ -680,7 +677,6 @@ struct __declspec(dllimport) KeyFuncClass { }; extern constexpr KeyFuncClass keyFuncClassVar = {}; // G32-DAG: @_ZTV12KeyFuncClass = external dllimport unnamed_addr constant { [3 x ptr] } -// C32-DAG: @_ZTV12KeyFuncClass = external dllimport unnamed_addr constant { [3 x ptr] } struct __declspec(dllimport) X : public virtual W {}; USECLASS(X) @@ -817,7 +813,6 @@ template struct __declspec(dllimport) PartiallySpecializedClassTemp USEMEMFUNC(PartiallySpecializedClassTemplate, f); // M32-DAG: declare dso_local x86_thiscallcc void @"?f@?$PartiallySpecializedClassTemplate@PAX@@QAEXXZ" // G32-DAG: declare dllimport x86_thiscallcc void @_ZN33PartiallySpecializedClassTemplateIPvE1fEv -// C32-DAG: declare dllimport void @_ZN33PartiallySpecializedClassTemplateIPvE1fEv // Attributes on explicit specializations are honored. template struct ExplicitlySpecializedClassTemplate {}; @@ -825,7 +820,6 @@ template <> struct __declspec(dllimport) ExplicitlySpecializedClassTemplate, f); // M32-DAG: declare dllimport x86_thiscallcc void @"?f@?$ExplicitlySpecializedClassTemplate@PAX@@QAEXXZ" // G32-DAG: declare dllimport x86_thiscallcc void @_ZN34ExplicitlySpecializedClassTemplateIPvE1fEv -// C32-DAG: declare dllimport void @_ZN34ExplicitlySpecializedClassTemplateIPvE1fEv // MS inherits DLL attributes to partial specializations. template struct __declspec(dllimport) PartiallySpecializedImportedClassTemplate {}; @@ -833,7 +827,6 @@ template struct PartiallySpecializedImportedClassTemplate { voi USEMEMFUNC(PartiallySpecializedImportedClassTemplate, f); // M32-DAG: {{declare|define available_externally}} dllimport x86_thiscallcc void @"?f@?$PartiallySpecializedImportedClassTemplate@PAX@@QAEXXZ" // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN41PartiallySpecializedImportedClassTemplateIPvE1fEv -// C32-DAG: define linkonce_odr dso_local void @_ZN41PartiallySpecializedImportedClassTemplateIPvE1fEv // Attributes on the instantiation take precedence over attributes on the template. template struct __declspec(dllexport) ExplicitlyInstantiatedWithDifferentAttr { void f() {} }; @@ -849,7 +842,6 @@ USEMEMFUNC(ExplicitInstantiationDeclImportedDefTemplate, f); // M32-DAG: {{declare|define available_externally}} dllimport x86_thiscallcc void @"?f@?$ExplicitInstantiationDeclImportedDefTemplate@H@@QAEXXZ" // M32-DAG: {{declare|define available_externally}} dllimport x86_thiscallcc ptr @"??0?$ExplicitInstantiationDeclImportedDefTemplate@H@@QAE@XZ" // G32-DAG: define weak_odr dso_local x86_thiscallcc void @_ZN44ExplicitInstantiationDeclImportedDefTemplateIiE1fEv -// C32-DAG: define weak_odr dso_local void @_ZN44ExplicitInstantiationDeclImportedDefTemplateIiE1fEv template struct __declspec(dllimport) ExplicitInstantiationDeclExportedDefImportedTemplate { void f() {} ExplicitInstantiationDeclExportedDefImportedTemplate() {} }; extern template struct __declspec(dllimport) ExplicitInstantiationDeclExportedDefImportedTemplate ; @@ -927,7 +919,6 @@ struct __declspec(dllimport) DerivedFromTemplate : public ClassTemplate {}; USEMEMFUNC(ClassTemplate, func) // M32-DAG: {{declare|define available_externally}} dllimport x86_thiscallcc void @"?func@?$ClassTemplate@H@@QAEXXZ" // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ClassTemplateIiE4funcEv -// C32-DAG: define linkonce_odr dso_local void @_ZN13ClassTemplateIiE4funcEv // PS-DAG: declare dllimport void @_ZN13ClassTemplateIiE4funcEv // ImportedTemplate is explicitly imported. @@ -935,7 +926,6 @@ struct __declspec(dllimport) DerivedFromImportedTemplate : public ImportedClassT USEMEMFUNC(ImportedClassTemplate, func) // M32-DAG: declare dllimport x86_thiscallcc void @"?func@?$ImportedClassTemplate@H@@QAEXXZ" // G32-DAG: declare dllimport x86_thiscallcc void @_ZN21ImportedClassTemplateIiE4funcEv -// C32-DAG: declare dllimport void @_ZN21ImportedClassTemplateIiE4funcEv // PS-DAG: declare dllimport void @_ZN21ImportedClassTemplateIiE4funcEv // ExportedTemplate is explicitly exported. @@ -943,7 +933,6 @@ struct __declspec(dllimport) DerivedFromExportedTemplate : public ExportedClassT USEMEMFUNC(ExportedClassTemplate, func) // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"?func@?$ExportedClassTemplate@H@@QAEXXZ" // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN21ExportedClassTemplateIiE4funcEv -// C32-DAG: define weak_odr dso_local dllexport void @_ZN21ExportedClassTemplateIiE4funcEv // PS-DAG: define weak_odr dllexport void @_ZN21ExportedClassTemplateIiE4funcEv // Base class already implicitly instantiated without attribute. @@ -952,7 +941,6 @@ struct __declspec(dllimport) DerivedFromTemplateD2 : public ClassTemplate, func) // M32-DAG: declare dllimport x86_thiscallcc void @"?func@?$ClassTemplate@N@@QAEXXZ" // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ClassTemplateIdE4funcEv -// C32-DAG: define linkonce_odr dso_local void @_ZN13ClassTemplateIdE4funcEv // PS-DAG: declare dllimport void @_ZN13ClassTemplateIdE4funcEv // MS: Base class already instantiated with dfferent attribute. @@ -961,7 +949,6 @@ struct __declspec(dllimport) DerivedFromTemplateB2 : public ClassTemplate USEMEMFUNC(ClassTemplate, func) // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"?func@?$ClassTemplate@_N@@QAEXXZ" // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN13ClassTemplateIbE4funcEv -// C32-DAG: define linkonce_odr dso_local void @_ZN13ClassTemplateIbE4funcEv // PS-DAG: define weak_odr dllexport void @_ZN13ClassTemplateIbE4funcEv // Base class already specialized without dll attribute. @@ -969,7 +956,6 @@ struct __declspec(dllimport) DerivedFromExplicitlySpecializedTemplate : public E USEMEMFUNC(ExplicitlySpecializedTemplate, func) // M32-DAG: define linkonce_odr dso_local x86_thiscallcc void @"?func@?$ExplicitlySpecializedTemplate@H@@QAEXXZ" // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN29ExplicitlySpecializedTemplateIiE4funcEv -// C32-DAG: define linkonce_odr dso_local void @_ZN29ExplicitlySpecializedTemplateIiE4funcEv // PS-DAG: define linkonce_odr void @_ZN29ExplicitlySpecializedTemplateIiE4funcEv // Base class alredy specialized with export attribute. @@ -977,7 +963,6 @@ struct __declspec(dllimport) DerivedFromExplicitlyExportSpecializedTemplate : pu USEMEMFUNC(ExplicitlyExportSpecializedTemplate, func) // M32-DAG: define dso_local dllexport x86_thiscallcc void @"?func@?$ExplicitlyExportSpecializedTemplate@H@@QAEXXZ" // G32-DAG: define dso_local dllexport x86_thiscallcc void @_ZN35ExplicitlyExportSpecializedTemplateIiE4funcEv -// C32-DAG: define dso_local dllexport void @_ZN35ExplicitlyExportSpecializedTemplateIiE4funcEv // PS-DAG: define dso_local dllexport void @_ZN35ExplicitlyExportSpecializedTemplateIiE4funcEv // Base class already specialized with import attribute. @@ -985,7 +970,6 @@ struct __declspec(dllimport) DerivedFromExplicitlyImportSpecializedTemplate : pu USEMEMFUNC(ExplicitlyImportSpecializedTemplate, func) // M32-DAG: declare dllimport x86_thiscallcc void @"?func@?$ExplicitlyImportSpecializedTemplate@H@@QAEXXZ" // G32-DAG: declare dllimport x86_thiscallcc void @_ZN35ExplicitlyImportSpecializedTemplateIiE4funcEv -// C32-DAG: declare dllimport void @_ZN35ExplicitlyImportSpecializedTemplateIiE4funcEv // PS-DAG: declare dllimport void @_ZN35ExplicitlyImportSpecializedTemplateIiE4funcEv // Base class already instantiated without dll attribute. @@ -993,7 +977,6 @@ struct __declspec(dllimport) DerivedFromExplicitlyInstantiatedTemplate : public USEMEMFUNC(ExplicitlyInstantiatedTemplate, func) // M32-DAG: define weak_odr dso_local x86_thiscallcc void @"?func@?$ExplicitlyInstantiatedTemplate@H@@QAEXXZ" // G32-DAG: define weak_odr dso_local x86_thiscallcc void @_ZN30ExplicitlyInstantiatedTemplateIiE4funcEv -// C32-DAG: define weak_odr dso_local void @_ZN30ExplicitlyInstantiatedTemplateIiE4funcEv // PS-DAG: define weak_odr void @_ZN30ExplicitlyInstantiatedTemplateIiE4funcEv // Base class already instantiated with export attribute. @@ -1001,7 +984,6 @@ struct __declspec(dllimport) DerivedFromExplicitlyExportInstantiatedTemplate : p USEMEMFUNC(ExplicitlyExportInstantiatedTemplate, func) // M32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @"?func@?$ExplicitlyExportInstantiatedTemplate@H@@QAEXXZ" // G32-DAG: define weak_odr dso_local dllexport x86_thiscallcc void @_ZN36ExplicitlyExportInstantiatedTemplateIiE4funcEv -// C32-DAG: define weak_odr dso_local dllexport void @_ZN36ExplicitlyExportInstantiatedTemplateIiE4funcEv // PS-DAG: define weak_odr dllexport void @_ZN36ExplicitlyExportInstantiatedTemplateIiE4funcEv // Base class already instantiated with import attribute. @@ -1009,7 +991,6 @@ struct __declspec(dllimport) DerivedFromExplicitlyImportInstantiatedTemplate : p USEMEMFUNC(ExplicitlyImportInstantiatedTemplate, func) // M32-DAG: declare dllimport x86_thiscallcc void @"?func@?$ExplicitlyImportInstantiatedTemplate@H@@QAEXXZ" // G32-DAG: declare dllimport x86_thiscallcc void @_ZN36ExplicitlyImportInstantiatedTemplateIiE4funcEv -// C32-DAG: declare dllimport void @_ZN36ExplicitlyImportInstantiatedTemplateIiE4funcEv // PS-DAG: declare dllimport void @_ZN36ExplicitlyImportInstantiatedTemplateIiE4funcEv // MS: A dll attribute propagates through multiple levels of instantiation. @@ -1019,7 +1000,6 @@ struct __declspec(dllimport) BottomClass : public MiddleClass { }; USEMEMFUNC(TopClass, func) // M32-DAG: {{declare|define available_externally}} dllimport x86_thiscallcc void @"?func@?$TopClass@H@@QAEXXZ" // G32-DAG: define linkonce_odr dso_local x86_thiscallcc void @_ZN8TopClassIiE4funcEv -// C32-DAG: define linkonce_odr dso_local void @_ZN8TopClassIiE4funcEv // PS-DAG: declare dllimport void @_ZN8TopClassIiE4funcEv template struct ExplicitInstantiationDeclTemplateBase { void func() {} }; @@ -1029,7 +1009,6 @@ template struct ExplicitInstantiationDeclTemplateBase; USEMEMFUNC(ExplicitInstantiationDeclTemplateBase, func) // M32-DAG: declare dllimport x86_thiscallcc void @"?func@?$ExplicitInstantiationDeclTemplateBase@H@@QAEXXZ" // G32-DAG: define weak_odr dso_local x86_thiscallcc void @_ZN37ExplicitInstantiationDeclTemplateBaseIiE4funcEv -// C32-DAG: define weak_odr dso_local void @_ZN37ExplicitInstantiationDeclTemplateBaseIiE4funcEv // PS-DAG: declare dllimport void @_ZN37ExplicitInstantiationDeclTemplateBaseIiE4funcEv template struct ExplicitInstantiationDeclTemplateBase2 { void func() {} }; @@ -1039,7 +1018,6 @@ template struct __declspec(dllexport) ExplicitInstantiationDeclTemplateBase2, func) // M32-DAG: declare dllimport x86_thiscallcc void @"?func@?$ExplicitInstantiationDeclTemplateBase2@H@@QAEXXZ" // G32-DAG: define weak_odr dso_local x86_thiscallcc void @_ZN38ExplicitInstantiationDeclTemplateBase2IiE4funcEv -// C32-DAG: define weak_odr dso_local void @_ZN38ExplicitInstantiationDeclTemplateBase2IiE4funcEv // PS-DAG: declare dllimport void @_ZN38ExplicitInstantiationDeclTemplateBase2IiE4funcEv namespace pr39496 { diff --git a/clang/test/CodeGenCXX/dso-local-executable.cpp b/clang/test/CodeGenCXX/dso-local-executable.cpp index 2be6812efc3a5..d021a6a06f0f0 100644 --- a/clang/test/CodeGenCXX/dso-local-executable.cpp +++ b/clang/test/CodeGenCXX/dso-local-executable.cpp @@ -1,7 +1,6 @@ // RUN: %clang_cc1 -triple x86_64-pc-linux -mrelocation-model static -O1 -disable-llvm-passes -emit-llvm %s -o - | FileCheck --check-prefix=STATIC %s // RUN: %clang_cc1 -triple x86_64-pc-linux -mrelocation-model static -fno-plt -O1 -disable-llvm-passes -emit-llvm %s -o - | FileCheck --check-prefix=NOPLT %s // RUN: %clang_cc1 -triple x86_64-w64-mingw32 -O1 -disable-llvm-passes -emit-llvm %s -o - | FileCheck --check-prefix=MINGW %s -// RUN: %clang_cc1 -triple x86_64-pc-cygwin -O1 -disable-llvm-passes -emit-llvm %s -o - | FileCheck --check-prefix=MINGW %s // STATIC-DAG: @_ZTV1C = linkonce_odr dso_local unnamed_addr constant // STATIC-DAG: @_ZTS1C = linkonce_odr dso_local constant diff --git a/clang/test/CodeGenCXX/mingw-template-dllexport.cpp b/clang/test/CodeGenCXX/mingw-template-dllexport.cpp index de112d6da53db..408a3fd0a77dd 100644 --- a/clang/test/CodeGenCXX/mingw-template-dllexport.cpp +++ b/clang/test/CodeGenCXX/mingw-template-dllexport.cpp @@ -1,7 +1,4 @@ // RUN: %clang_cc1 -emit-llvm -triple i686-mingw32 %s -o - | FileCheck %s -// RUN: %clang_cc1 -emit-llvm -triple x86_64-w64-mingw32 %s -o - | FileCheck %s -// RUN: %clang_cc1 -emit-llvm -triple i686-pc-cygwin %s -o - | FileCheck %s -// RUN: %clang_cc1 -emit-llvm -triple x86_64-pc-cygwin %s -o - | FileCheck %s #define JOIN2(x, y) x##y #define JOIN(x, y) JOIN2(x, y) diff --git a/clang/test/CodeGenCXX/rtti-mingw64.cpp b/clang/test/CodeGenCXX/rtti-mingw64.cpp index 9de280602945d..e0a4607cf28c3 100644 --- a/clang/test/CodeGenCXX/rtti-mingw64.cpp +++ b/clang/test/CodeGenCXX/rtti-mingw64.cpp @@ -1,6 +1,4 @@ // RUN: %clang_cc1 -triple x86_64-windows-gnu %s -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -triple x86_64-pc-cygwin %s -emit-llvm -o - | FileCheck %s - struct A { int a; }; struct B : virtual A { int b; }; B b; diff --git a/clang/test/CodeGenCXX/virt-dtor-key.cpp b/clang/test/CodeGenCXX/virt-dtor-key.cpp index cd169ab01dc8b..d3b9ab3351518 100644 --- a/clang/test/CodeGenCXX/virt-dtor-key.cpp +++ b/clang/test/CodeGenCXX/virt-dtor-key.cpp @@ -1,9 +1,5 @@ -// RUN: %clang_cc1 -triple i686-linux -emit-llvm %s -o - | FileCheck %s -// RUN: %clang_cc1 -triple i686-windows-gnu -emit-llvm %s -o - | FileCheck %s -check-prefix CHECK-MINGW -// RUN: %clang_cc1 -triple x86_64-windows-gnu -emit-llvm %s -o - | FileCheck %s -check-prefix CHECK-MINGW -// RUN: %clang_cc1 -triple i686-pc-cygwin -emit-llvm %s -o - | FileCheck %s -check-prefix CHECK-MINGW -// RUN: %clang_cc1 -triple x86_64-pc-cygwin -emit-llvm %s -o - | FileCheck %s -check-prefix CHECK-MINGW - +// RUN: %clang_cc1 -triple i386-linux -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple i386-windows-gnu -emit-llvm %s -o - | FileCheck %s -check-prefix CHECK-MINGW // CHECK: @_ZTI3foo ={{.*}} constant // CHECK-MINGW: @_ZTI3foo = linkonce_odr class foo { diff --git a/clang/test/CodeGenCXX/vtable-key-function-ios.cpp b/clang/test/CodeGenCXX/vtable-key-function-ios.cpp index b11d0a62a04a6..43abfb62c73a6 100644 --- a/clang/test/CodeGenCXX/vtable-key-function-ios.cpp +++ b/clang/test/CodeGenCXX/vtable-key-function-ios.cpp @@ -3,8 +3,6 @@ // RUN: %clang_cc1 %s -triple=x86_64-pc-windows-gnu -emit-llvm -o - | FileCheck -check-prefixes=CHECK,CHECK-MINGW %s // RUN: %clang_cc1 %s -triple=x86_64-pc-windows-gnu -emit-llvm -o - | FileCheck -check-prefix=CHECK-LATE %s -// RUN: %clang_cc1 %s -triple=x86_64-pc-cygwin -emit-llvm -o - | FileCheck -check-prefixes=CHECK,CHECK-MINGW %s -// RUN: %clang_cc1 %s -triple=x86_64-pc-cygwin -emit-llvm -o - | FileCheck -check-prefix=CHECK-LATE %s // The 'a' variants ask for the vtable first. // The 'b' variants ask for the vtable second. diff --git a/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl b/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl index 1d451acfc6214..b798c2a6d6c4b 100644 --- a/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl +++ b/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl @@ -33,7 +33,7 @@ void SecondEntry() {} // Verify the constructor is alwaysinline // NOINLINE: ; Function Attrs: {{.*}}alwaysinline -// NOINLINE-NEXT: define linkonce_odr void @_ZN4hlsl8RWBufferIfEC2Ejijj({{.*}} [[CtorAttr:\#[0-9]+]] +// NOINLINE-NEXT: define linkonce_odr void @_ZN4hlsl8RWBufferIfEC2Ev({{.*}} [[CtorAttr:\#[0-9]+]] // NOINLINE: ; Function Attrs: {{.*}}alwaysinline // NOINLINE-NEXT: define internal void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl() [[InitAttr:\#[0-9]+]] diff --git a/clang/test/CodeGenHLSL/builtins/ByteAddressBuffers-constructors.hlsl b/clang/test/CodeGenHLSL/builtins/ByteAddressBuffers-constructors.hlsl index 5f844fcfe4121..d7c4b03552cdc 100644 --- a/clang/test/CodeGenHLSL/builtins/ByteAddressBuffers-constructors.hlsl +++ b/clang/test/CodeGenHLSL/builtins/ByteAddressBuffers-constructors.hlsl @@ -35,19 +35,31 @@ export void foo() { // Buf1 initialization part 2 - body of ByteAddressBuffer C1 constructor with explicit binding that calls the C2 constructor // CHECK: define linkonce_odr void @_ZN4hlsl17ByteAddressBufferC1Ejjij(ptr noundef nonnull align 4 dereferenceable(4) %this, // CHECK-SAME: i32 noundef %registerNo, i32 noundef %spaceNo, i32 noundef %range, i32 noundef %index) -// CHECK: call void @_ZN4hlsl17ByteAddressBufferC2Ejjij(ptr noundef nonnull align 4 dereferenceable(4) -// CHECK-SAME: %{{.*}}, i32 noundef %{{.*}}, i32 noundef %{{.*}}, i32 noundef %{{.*}}, i32 noundef %{{.*}}) - -// Buf2 initialization part 1 - global init function that calls RWByteAddressBuffer C1 constructor with implicit binding -// CHECK: define internal void @__cxx_global_var_init.1() #0 { // CHECK-NEXT: entry: -// CHECK-NEXT: call void @_ZN4hlsl19RWByteAddressBufferC1Ejijj(ptr noundef nonnull align 4 dereferenceable(4) @_ZL4Buf2, -// CHECK-SAME: i32 noundef 0, i32 noundef 1, i32 noundef 0, i32 noundef 0) +// CHECK-NEXT: %this.addr = alloca ptr, align 4 +// CHECK-NEXT: %registerNo.addr = alloca i32, align 4 +// CHECK-NEXT: %spaceNo.addr = alloca i32, align 4 +// CHECK-NEXT: %range.addr = alloca i32, align 4 +// CHECK-NEXT: %index.addr = alloca i32, align 4 +// CHECK-NEXT: store ptr %this, ptr %this.addr, align 4 +// CHECK-NEXT: store i32 %registerNo, ptr %registerNo.addr, align 4 +// CHECK-NEXT: store i32 %spaceNo, ptr %spaceNo.addr, align 4 +// CHECK-NEXT: store i32 %range, ptr %range.addr, align 4 +// CHECK-NEXT: store i32 %index, ptr %index.addr, align 4 +// CHECK-NEXT: %this1 = load ptr, ptr %this.addr, align 4 +// CHECK-NEXT: %0 = load i32, ptr %registerNo.addr, align 4 +// CHECK-NEXT: %1 = load i32, ptr %spaceNo.addr, align 4 +// CHECK-NEXT: %2 = load i32, ptr %range.addr, align 4 +// CHECK-NEXT: %3 = load i32, ptr %index.addr, align 4 +// CHECK: call void @_ZN4hlsl17ByteAddressBufferC2Ejjij(ptr noundef nonnull align 4 dereferenceable(4) %this1, +// CHECK-SAME: i32 noundef %0, i32 noundef %1, i32 noundef %2, i32 noundef %3) +// CHECK-NEXT: ret void -// Buf2 initialization part 2 - body of RWByteAddressBuffer C1 constructor with implicit binding that calls the C2 constructor -// CHECK: define linkonce_odr void @_ZN4hlsl19RWByteAddressBufferC1Ejijj(ptr noundef nonnull align 4 dereferenceable(4) %this, -// CHECK-SAME: i32 noundef %spaceNo, i32 noundef %range, i32 noundef %index, i32 noundef %orderId) -// CHECK: call void @_ZN4hlsl19RWByteAddressBufferC2Ejijj(ptr noundef nonnull align 4 dereferenceable(4) %this1, i32 noundef %0, i32 noundef %1, i32 noundef %2, i32 noundef %3) #4 +// Buf2 initialization part 1 - FIXME: constructor with implicit binding does not exist yet; +// the global init function currently calls the default RWByteAddressBuffer C1 constructor +// CHECK: define internal void @__cxx_global_var_init.1() +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @_ZN4hlsl19RWByteAddressBufferC1Ev(ptr noundef nonnull align 4 dereferenceable(4) @_ZL4Buf2) // Buf3 initialization part 1 - local variable declared in function foo() is initialized by // RasterizerOrderedByteAddressBuffer C1 default constructor @@ -59,30 +71,43 @@ export void foo() { // Buf3 initialization part 2 - body of RasterizerOrderedByteAddressBuffer default C1 constructor that // calls the default C2 constructor // CHECK: define linkonce_odr void @_ZN4hlsl34RasterizerOrderedByteAddressBufferC1Ev(ptr noundef nonnull align 4 dereferenceable(4) %this) -// CHECK: call void @_ZN4hlsl34RasterizerOrderedByteAddressBufferC2Ev(ptr noundef nonnull align 4 dereferenceable(4) %{{.*}}) +// CHECK-NEXT: entry: +// CHECK-NEXT: %this.addr = alloca ptr, align 4 +// CHECK-NEXT: store ptr %this, ptr %this.addr, align 4 +// CHECK-NEXT: %this1 = load ptr, ptr %this.addr, align 4 +// CHECK: call void @_ZN4hlsl34RasterizerOrderedByteAddressBufferC2Ev(ptr noundef nonnull align 4 dereferenceable(4) %this1) // CHECK-NEXT: ret void // Buf1 initialization part 3 - ByteAddressBuffer C2 constructor with explicit binding that initializes // handle with @llvm.dx.resource.handlefrombinding // CHECK: define linkonce_odr void @_ZN4hlsl17ByteAddressBufferC2Ejjij(ptr noundef nonnull align 4 dereferenceable(4) %this, // CHECK-SAME: i32 noundef %registerNo, i32 noundef %spaceNo, i32 noundef %range, i32 noundef %index) -// CHECK-DXIL: %[[HANDLE:.*]] = call target("dx.RawBuffer", i8, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_0_0t( -// CHECK-DXIL-SAME: i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 false) -// CHECK-NEXT: %__handle = getelementptr inbounds nuw %"class.hlsl::ByteAddressBuffer", ptr %{{.*}}, i32 0, i32 0 -// CHECK-DXIL-NEXT: store target("dx.RawBuffer", i8, 0, 0) %[[HANDLE]], ptr %__handle, align 4 - -// Buf2 initialization part 3 - body of RWByteAddressBuffer C2 constructor with implicit binding that initializes -// handle with @llvm.dx.resource.handlefromimplicitbinding -// CHECK: define linkonce_odr void @_ZN4hlsl19RWByteAddressBufferC2Ejijj(ptr noundef nonnull align 4 dereferenceable(4) %this, -// CHECK-SAME: i32 noundef %spaceNo, i32 noundef %range, i32 noundef %index, i32 noundef %orderId) unnamed_addr #1 align 2 { -// CHECK: %[[HANDLE:.*]] = call target("dx.RawBuffer", i8, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_i8_1_0t(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 false) -// CHECK-NEXT: %__handle = getelementptr inbounds nuw %"class.hlsl::RWByteAddressBuffer", ptr %this1, i32 0, i32 0 -// CHECK-NEXT: store target("dx.RawBuffer", i8, 1, 0) %[[HANDLE]], ptr %__handle, align 4 +// CHECK-NEXT: entry: +// CHECK-NEXT: %this.addr = alloca ptr, align 4 +// CHECK-NEXT: %registerNo.addr = alloca i32, align 4 +// CHECK-NEXT: %spaceNo.addr = alloca i32, align 4 +// CHECK-NEXT: %range.addr = alloca i32, align 4 +// CHECK-NEXT: %index.addr = alloca i32, align 4 +// CHECK-NEXT: store ptr %this, ptr %this.addr, align 4 +// CHECK-NEXT: store i32 %registerNo, ptr %registerNo.addr, align 4 +// CHECK-NEXT: store i32 %spaceNo, ptr %spaceNo.addr, align 4 +// CHECK-NEXT: store i32 %range, ptr %range.addr, align 4 +// CHECK-NEXT: store i32 %index, ptr %index.addr, align 4 +// CHECK-NEXT: %this1 = load ptr, ptr %this.addr, align 4 +// CHECK-NEXT: %0 = load i32, ptr %registerNo.addr, align 4 +// CHECK-NEXT: %1 = load i32, ptr %spaceNo.addr, align 4 +// CHECK-NEXT: %2 = load i32, ptr %range.addr, align 4 +// CHECK-NEXT: %3 = load i32, ptr %index.addr, align 4 +// CHECK-DXIL-NEXT: %4 = call target("dx.RawBuffer", i8, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_0_0t( +// CHECK-DXIL-SAME: i32 %1, i32 %0, i32 %2, i32 %3, i1 false) +// CHECK-NEXT: %__handle = getelementptr inbounds nuw %"class.hlsl::ByteAddressBuffer", ptr %this1, i32 0, i32 0 +// CHECK-DXIL-NEXT: store target("dx.RawBuffer", i8, 0, 0) %4, ptr %__handle, align 4 +// CHECK-NEXT: ret void // Buf3 initialization part 3 - body of RasterizerOrderedByteAddressBuffer default C2 constructor that // initializes handle to poison // CHECK: define linkonce_odr void @_ZN4hlsl34RasterizerOrderedByteAddressBufferC2Ev(ptr noundef nonnull align 4 dereferenceable(4) %this) -// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::RasterizerOrderedByteAddressBuffer", ptr %{{.*}}, i32 0, i32 0 +// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::RasterizerOrderedByteAddressBuffer", ptr %this1, i32 0, i32 0 // CHECK: store target("dx.RawBuffer", i8, 1, 1) poison, ptr %__handle, align 4 // Module initialization diff --git a/clang/test/CodeGenHLSL/builtins/RWBuffer-constructor.hlsl b/clang/test/CodeGenHLSL/builtins/RWBuffer-constructor.hlsl index ad8ebdf7d8c85..adf231dedf4cb 100644 --- a/clang/test/CodeGenHLSL/builtins/RWBuffer-constructor.hlsl +++ b/clang/test/CodeGenHLSL/builtins/RWBuffer-constructor.hlsl @@ -35,19 +35,31 @@ export void foo() { // Buf1 initialization part 2 - body of RWBuffer C1 constructor with explicit binding that calls the C2 constructor // CHECK: define linkonce_odr void @_ZN4hlsl8RWBufferIfEC1Ejjij(ptr noundef nonnull align 4 dereferenceable(4) %this, // CHECK-SAME: i32 noundef %registerNo, i32 noundef %spaceNo, i32 noundef %range, i32 noundef %index) -// CHECK: call void @_ZN4hlsl8RWBufferIfEC2Ejjij(ptr noundef nonnull align 4 dereferenceable(4) -// CHECK-SAME: %{{.*}}, i32 noundef %{{.*}}, i32 noundef %{{.*}}, i32 noundef %{{.*}}, i32 noundef %{{.*}}) - -// Buf2 initialization part 1 - global init function that calls RWBuffer C1 constructor with implicit binding -// CHECK: define internal void @__cxx_global_var_init.1() // CHECK-NEXT: entry: -// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIdEC1Ejijj(ptr noundef nonnull align 4 dereferenceable(4) @_ZL4Buf2, -// CHECK-SAME: i32 noundef 0, i32 noundef 1, i32 noundef 0, i32 noundef 0) - -// Buf2 initialization part 2 - body of RWBuffer C1 constructor with implicit binding that calls the C2 constructor -// CHECK: define linkonce_odr void @_ZN4hlsl8RWBufferIdEC1Ejijj(ptr noundef nonnull align 4 dereferenceable(4) %this, -// CHECK-SAME: i32 noundef %spaceNo, i32 noundef %range, i32 noundef %index, i32 noundef %orderId) -// CHECK: call void @_ZN4hlsl8RWBufferIdEC2Ejijj(ptr noundef nonnull align 4 dereferenceable(4) %{{.*}}, i32 noundef %{{.*}}, i32 noundef %{{.*}}, i32 noundef %{{.*}}, i32 noundef %{{.*}}) #4 +// CHECK-NEXT: %this.addr = alloca ptr, align 4 +// CHECK-NEXT: %registerNo.addr = alloca i32, align 4 +// CHECK-NEXT: %spaceNo.addr = alloca i32, align 4 +// CHECK-NEXT: %range.addr = alloca i32, align 4 +// CHECK-NEXT: %index.addr = alloca i32, align 4 +// CHECK-NEXT: store ptr %this, ptr %this.addr, align 4 +// CHECK-NEXT: store i32 %registerNo, ptr %registerNo.addr, align 4 +// CHECK-NEXT: store i32 %spaceNo, ptr %spaceNo.addr, align 4 +// CHECK-NEXT: store i32 %range, ptr %range.addr, align 4 +// CHECK-NEXT: store i32 %index, ptr %index.addr, align 4 +// CHECK-NEXT: %this1 = load ptr, ptr %this.addr, align 4 +// CHECK-NEXT: %0 = load i32, ptr %registerNo.addr, align 4 +// CHECK-NEXT: %1 = load i32, ptr %spaceNo.addr, align 4 +// CHECK-NEXT: %2 = load i32, ptr %range.addr, align 4 +// CHECK-NEXT: %3 = load i32, ptr %index.addr, align 4 +// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC2Ejjij(ptr noundef nonnull align 4 dereferenceable(4) %this1, +// CHECK-SAME: i32 noundef %0, i32 noundef %1, i32 noundef %2, i32 noundef %3) +// CHECK-NEXT: ret void + +// Buf2 initialization part 1 - FIXME: constructor with implicit binding does not exist yet; +// the global init function currently calls the default RWBufer C1 constructor +// CHECK: define internal void @__cxx_global_var_init.1() #0 { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIdEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) @_ZL4Buf2) // Buf3 initialization part 1 - local variable declared in function foo() is initialized by RWBuffer C1 default constructor // CHECK: define void @_Z3foov() @@ -57,29 +69,48 @@ export void foo() { // Buf3 initialization part 2 - body of RWBuffer default C1 constructor that calls the default C2 constructor // CHECK: define linkonce_odr void @_ZN4hlsl8RWBufferIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) %this) -// CHECK: call void @_ZN4hlsl8RWBufferIiEC2Ev(ptr noundef nonnull align 4 dereferenceable(4) %{{.*}}) +// CHECK-NEXT: entry: +// CHECK-NEXT: %this.addr = alloca ptr, align 4 +// CHECK-NEXT: store ptr %this, ptr %this.addr, align 4 +// CHECK-NEXT: %this1 = load ptr, ptr %this.addr, align 4 +// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIiEC2Ev(ptr noundef nonnull align 4 dereferenceable(4) %this1) +// CHECK-NEXT: ret void // Buf1 initialization part 3 - body of RWBuffer C2 constructor with explicit binding that initializes // handle with @llvm.dx.resource.handlefrombinding // CHECK: define linkonce_odr void @_ZN4hlsl8RWBufferIfEC2Ejjij(ptr noundef nonnull align 4 dereferenceable(4) %this, // CHECK-SAME: i32 noundef %registerNo, i32 noundef %spaceNo, i32 noundef %range, i32 noundef %index) -// CHECK-DXIL: %[[HANDLE:.*]] = call target("dx.TypedBuffer", float, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f32_1_0_0t( -// CHECK-DXIL-SAME: i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 false) -// CHECK-NEXT: %__handle = getelementptr inbounds nuw %"class.hlsl::RWBuffer", ptr %{{.*}}, i32 0, i32 0 -// CHECK-DXIL-NEXT: store target("dx.TypedBuffer", float, 1, 0, 0) %[[HANDLE]], ptr %__handle, align 4 - -// Buf2 initialization part 3 - body of RWBuffer C2 constructor with implicit binding that initializes -// handle with @llvm.dx.resource.handlefromimplicitbinding -// CHECK: define linkonce_odr void @_ZN4hlsl8RWBufferIdEC2Ejijj(ptr noundef nonnull align 4 dereferenceable(4) %this, -// CHECK-SAME: i32 noundef %spaceNo, i32 noundef %range, i32 noundef %index, i32 noundef %orderId) unnamed_addr #1 align 2 { -// CHECK: %[[HANDLE:.*]] = call target("dx.TypedBuffer", double, 1, 0, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.TypedBuffer_f64_1_0_0t(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 false) -// CHECK-NEXT: %__handle = getelementptr inbounds nuw %"class.hlsl::RWBuffer.0", ptr %{{.*}}, i32 0, i32 0 -// CHECK-NEXT: store target("dx.TypedBuffer", double, 1, 0, 0) %[[HANDLE]], ptr %__handle, align 4 +// CHECK-NEXT: entry: +// CHECK-NEXT: %this.addr = alloca ptr, align 4 +// CHECK-NEXT: %registerNo.addr = alloca i32, align 4 +// CHECK-NEXT: %spaceNo.addr = alloca i32, align 4 +// CHECK-NEXT: %range.addr = alloca i32, align 4 +// CHECK-NEXT: %index.addr = alloca i32, align 4 +// CHECK-NEXT: store ptr %this, ptr %this.addr, align 4 +// CHECK-NEXT: store i32 %registerNo, ptr %registerNo.addr, align 4 +// CHECK-NEXT: store i32 %spaceNo, ptr %spaceNo.addr, align 4 +// CHECK-NEXT: store i32 %range, ptr %range.addr, align 4 +// CHECK-NEXT: store i32 %index, ptr %index.addr, align 4 +// CHECK-NEXT: %this1 = load ptr, ptr %this.addr, align 4 +// CHECK-NEXT: %0 = load i32, ptr %registerNo.addr, align 4 +// CHECK-NEXT: %1 = load i32, ptr %spaceNo.addr, align 4 +// CHECK-NEXT: %2 = load i32, ptr %range.addr, align 4 +// CHECK-NEXT: %3 = load i32, ptr %index.addr, align 4 +// CHECK-DXIL-NEXT: %4 = call target("dx.TypedBuffer", float, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f32_1_0_0t( +// CHECK-DXIL-SAME: i32 %1, i32 %0, i32 %2, i32 %3, i1 false) +// CHECK-NEXT: %__handle = getelementptr inbounds nuw %"class.hlsl::RWBuffer", ptr %this1, i32 0, i32 0 +// CHECK-DXIL-NEXT: store target("dx.TypedBuffer", float, 1, 0, 0) %4, ptr %__handle, align 4 +// CHECK-NEXT: ret void // Buf3 initialization part 3 - body of RWBuffer default C2 constructor that initializes handle to poison // CHECK: define linkonce_odr void @_ZN4hlsl8RWBufferIiEC2Ev(ptr noundef nonnull align 4 dereferenceable(4) %this) -// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::RWBuffer.1", ptr %{{.*}}, i32 0, i32 0 +// CHECK-NEXT: entry: +// CHECK-NEXT: %this.addr = alloca ptr, align 4 +// CHECK-NEXT: store ptr %this, ptr %this.addr, align 4 +// CHECK-NEXT: %this1 = load ptr, ptr %this.addr, align 4 +// CHECK-NEXT: %__handle = getelementptr inbounds nuw %"class.hlsl::RWBuffer.1", ptr %this1, i32 0, i32 0 // CHECK-NEXT: store target("dx.TypedBuffer", i32, 1, 0, 1) poison, ptr %__handle, align 4 +// CHECK-NEXT: ret void // Module initialization // CHECK: define internal void @_GLOBAL__sub_I_RWBuffer_constructor.hlsl() diff --git a/clang/test/CodeGenHLSL/builtins/StructuredBuffers-constructors.hlsl b/clang/test/CodeGenHLSL/builtins/StructuredBuffers-constructors.hlsl index 34ce676a02f83..ea818a737cf74 100644 --- a/clang/test/CodeGenHLSL/builtins/StructuredBuffers-constructors.hlsl +++ b/clang/test/CodeGenHLSL/builtins/StructuredBuffers-constructors.hlsl @@ -36,21 +36,31 @@ export void foo() { // that calls the C2 constructor // CHECK: define linkonce_odr void @_ZN4hlsl16StructuredBufferIfEC1Ejjij(ptr noundef nonnull align 4 dereferenceable(4) %this, // CHECK-SAME: i32 noundef %registerNo, i32 noundef %spaceNo, i32 noundef %range, i32 noundef %index) -// CHECK: call void @_ZN4hlsl16StructuredBufferIfEC2Ejjij(ptr noundef nonnull align 4 dereferenceable(4) -// CHECK-SAME: %{{.*}}, i32 noundef %{{.*}}, i32 noundef %{{.*}}, i32 noundef %{{.*}}, i32 noundef %{{.*}}) - -// Buf2 initialization part 1 - global init function that calls RWStructuredBuffer C1 constructor with -// implicit binding +// CHECK-NEXT: entry: +// CHECK-NEXT: %this.addr = alloca ptr, align 4 +// CHECK-NEXT: %registerNo.addr = alloca i32, align 4 +// CHECK-NEXT: %spaceNo.addr = alloca i32, align 4 +// CHECK-NEXT: %range.addr = alloca i32, align 4 +// CHECK-NEXT: %index.addr = alloca i32, align 4 +// CHECK-NEXT: store ptr %this, ptr %this.addr, align 4 +// CHECK-NEXT: store i32 %registerNo, ptr %registerNo.addr, align 4 +// CHECK-NEXT: store i32 %spaceNo, ptr %spaceNo.addr, align 4 +// CHECK-NEXT: store i32 %range, ptr %range.addr, align 4 +// CHECK-NEXT: store i32 %index, ptr %index.addr, align 4 +// CHECK-NEXT: %this1 = load ptr, ptr %this.addr, align 4 +// CHECK-NEXT: %0 = load i32, ptr %registerNo.addr, align 4 +// CHECK-NEXT: %1 = load i32, ptr %spaceNo.addr, align 4 +// CHECK-NEXT: %2 = load i32, ptr %range.addr, align 4 +// CHECK-NEXT: %3 = load i32, ptr %index.addr, align 4 +// CHECK: call void @_ZN4hlsl16StructuredBufferIfEC2Ejjij(ptr noundef nonnull align 4 dereferenceable(4) %this1, +// CHECK-SAME: i32 noundef %0, i32 noundef %1, i32 noundef %2, i32 noundef %3) +// CHECK-NEXT: ret void + +// Buf2 initialization part 1 - FIXME: constructor with implicit binding does not exist yet; +// the global init function currently calls the default RWStructuredBufer C1 constructor // CHECK: define internal void @__cxx_global_var_init.1() // CHECK-NEXT: entry: -// CHECK-NEXT: call void @_ZN4hlsl18RWStructuredBufferIfEC1Ejijj(ptr noundef nonnull align 4 dereferenceable(4) @_ZL4Buf2, -// CHECK-SAME: i32 noundef 0, i32 noundef 1, i32 noundef 0, i32 noundef 0) - -// Buf2 initialization part 2 - body of RWStructuredBuffer C1 constructor with implicit binding that calls the C2 constructor -// CHECK: define linkonce_odr void @_ZN4hlsl18RWStructuredBufferIfEC1Ejijj(ptr noundef nonnull align 4 dereferenceable(4) %this, -// CHECK-SAME: i32 noundef %spaceNo, i32 noundef %range, i32 noundef %index, i32 noundef %orderId) -// CHECK: call void @_ZN4hlsl18RWStructuredBufferIfEC2Ejijj(ptr noundef nonnull align 4 dereferenceable(4) -// CHECK-SAME; %{{.*}}, i32 noundef %{{.*}}, i32 noundef %{{.*}}, i32 noundef %{{.*}}, i32 noundef %{{.*}}) #4 +// CHECK-NEXT: call void @_ZN4hlsl18RWStructuredBufferIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) @_ZL4Buf2) // Buf3 initialization part 1 - local variable declared in function foo() is initialized by // AppendStructuredBuffer C1 default constructor @@ -62,29 +72,47 @@ export void foo() { // Buf3 initialization part 2 - body of AppendStructuredBuffer default C1 constructor that calls // the default C2 constructor // CHECK: define linkonce_odr void @_ZN4hlsl22AppendStructuredBufferIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) %this) -// CHECK: call void @_ZN4hlsl22AppendStructuredBufferIfEC2Ev(ptr noundef nonnull align 4 dereferenceable(4) %{{.*}}) +// CHECK-NEXT: entry: +// CHECK-NEXT: %this.addr = alloca ptr, align 4 +// CHECK-NEXT: store ptr %this, ptr %this.addr, align 4 +// CHECK-NEXT: %this1 = load ptr, ptr %this.addr, align 4 +// CHECK: call void @_ZN4hlsl22AppendStructuredBufferIfEC2Ev(ptr noundef nonnull align 4 dereferenceable(4) %this1) +// CHECK-NEXT: ret void // Buf1 initialization part 3 - body of AppendStructuredBuffer C2 constructor with explicit binding // that initializes handle with @llvm.dx.resource.handlefrombinding // CHECK: define linkonce_odr void @_ZN4hlsl16StructuredBufferIfEC2Ejjij(ptr noundef nonnull align 4 dereferenceable(4) %this, // CHECK-SAME: i32 noundef %registerNo, i32 noundef %spaceNo, i32 noundef %range, i32 noundef %index) -// CHECK-DXIL: %[[HANDLE:.*]] = call target("dx.RawBuffer", float, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_f32_0_0t( -// CHECK-SAME: i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 false) -// CHECK-NEXT: %__handle = getelementptr inbounds nuw %"class.hlsl::StructuredBuffer", ptr %{{.*}}, i32 0, i32 0 -// CHECK-DXIL-NEXT: store target("dx.RawBuffer", float, 0, 0) %[[HANDLE]], ptr %__handle, align 4 - -// Buf2 initialization part 3 - body of RWStructuredBuffer C2 constructor with implicit binding that initializes -// handle with @llvm.dx.resource.handlefromimplicitbinding -// CHECK: define linkonce_odr void @_ZN4hlsl18RWStructuredBufferIfEC2Ejijj(ptr noundef nonnull align 4 dereferenceable(4) %this, -// CHECK-SAME: i32 noundef %spaceNo, i32 noundef %range, i32 noundef %index, i32 noundef %orderId) unnamed_addr #1 align 2 { -// CHECK: %[[HANDLE:.*]] = call target("dx.RawBuffer", float, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f32_1_0t(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 false) -// CHECK-NEXT: %__handle = getelementptr inbounds nuw %"class.hlsl::RWStructuredBuffer", ptr %{{.*}}, i32 0, i32 0 -// CHECK-NEXT: store target("dx.RawBuffer", float, 1, 0) %[[HANDLE]], ptr %__handle, align 4 +// CHECK-NEXT: entry: +// CHECK-NEXT: %this.addr = alloca ptr, align 4 +// CHECK-NEXT: %registerNo.addr = alloca i32, align 4 +// CHECK-NEXT: %spaceNo.addr = alloca i32, align 4 +// CHECK-NEXT: %range.addr = alloca i32, align 4 +// CHECK-NEXT: %index.addr = alloca i32, align 4 +// CHECK-NEXT: store ptr %this, ptr %this.addr, align 4 +// CHECK-NEXT: store i32 %registerNo, ptr %registerNo.addr, align 4 +// CHECK-NEXT: store i32 %spaceNo, ptr %spaceNo.addr, align 4 +// CHECK-NEXT: store i32 %range, ptr %range.addr, align 4 +// CHECK-NEXT: store i32 %index, ptr %index.addr, align 4 +// CHECK-NEXT: %this1 = load ptr, ptr %this.addr, align 4 +// CHECK-NEXT: %0 = load i32, ptr %registerNo.addr, align 4 +// CHECK-NEXT: %1 = load i32, ptr %spaceNo.addr, align 4 +// CHECK-NEXT: %2 = load i32, ptr %range.addr, align 4 +// CHECK-NEXT: %3 = load i32, ptr %index.addr, align 4 +// CHECK-DXIL-NEXT: %4 = call target("dx.RawBuffer", float, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_f32_0_0t( +// CHECK-DXIL-SAME: i32 %1, i32 %0, i32 %2, i32 %3, i1 false) +// CHECK-NEXT: %__handle = getelementptr inbounds nuw %"class.hlsl::StructuredBuffer", ptr %this1, i32 0, i32 0 +// CHECK-DXIL-NEXT: store target("dx.RawBuffer", float, 0, 0) %4, ptr %__handle, align 4 +// CHECK-NEXT: ret void // Buf3 initialization part 3 - body of AppendStructuredBuffer default C2 constructor that // initializes handle to poison // CHECK: define linkonce_odr void @_ZN4hlsl22AppendStructuredBufferIfEC2Ev(ptr noundef nonnull align 4 dereferenceable(4) %this) -// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::AppendStructuredBuffer", ptr %{{.*}}, i32 0, i32 0 +// CHECK-NEXT: entry: +// CHECK-NEXT: %this.addr = alloca ptr, align 4 +// CHECK-NEXT: store ptr %this, ptr %this.addr, align 4 +// CHECK-NEXT: %this1 = load ptr, ptr %this.addr, align 4 +// CHECK: %__handle = getelementptr inbounds nuw %"class.hlsl::AppendStructuredBuffer", ptr %this1, i32 0, i32 0 // CHECK: store target("dx.RawBuffer", float, 1, 0) poison, ptr %__handle, align 4 // Module initialization diff --git a/clang/test/CodeGenHLSL/static-local-ctor.hlsl b/clang/test/CodeGenHLSL/static-local-ctor.hlsl index 474bcf1aff6ac..7aeb5e987d6b2 100644 --- a/clang/test/CodeGenHLSL/static-local-ctor.hlsl +++ b/clang/test/CodeGenHLSL/static-local-ctor.hlsl @@ -21,7 +21,7 @@ void InitBuf(RWBuffer buf) { // CHECK-NEXT: br i1 [[Tmp3]] // CHECK-NOT: _Init_thread_header // CHECK: init.check: -// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIiEC1Ejijj +// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIiEC1Ev // CHECK-NEXT: store i8 1, ptr @_ZGVZ4mainvE5mybuf // CHECK-NOT: _Init_thread_footer diff --git a/clang/test/Driver/fsanitize.c b/clang/test/Driver/fsanitize.c index 1f696aba8d088..24d64c94c0956 100644 --- a/clang/test/Driver/fsanitize.c +++ b/clang/test/Driver/fsanitize.c @@ -317,9 +317,6 @@ // RUN: not %clang --target=aarch64-linux -fsanitize=memtag -I +mte %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANMT-NOMT-1 // CHECK-SANMT-NOMT-1: '-fsanitize=memtag-stack' requires hardware support (+memtag) -// RUN: not %clang --target=aarch64-linux-android31 -fsanitize-trap=memtag -march=armv8-a+memtag -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SANMT-TRAP -// CHECK-SANMT-TRAP: error: unsupported argument 'memtag' to option '-fsanitize-trap=' - // RUN: %clang --target=x86_64-linux-gnu -fsanitize=address -fsanitize-address-use-after-scope %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-USE-AFTER-SCOPE // RUN: %clang_cl --target=x86_64-windows -fsanitize=address -fsanitize-address-use-after-scope -### -- %s 2>&1 | FileCheck %s --check-prefix=CHECK-USE-AFTER-SCOPE // CHECK-USE-AFTER-SCOPE: -cc1{{.*}}-fsanitize-address-use-after-scope diff --git a/clang/test/Driver/ppc-mrop-protection-support-check.c b/clang/test/Driver/ppc-mrop-protection-support-check.c index f500e9e3e510c..50eaef3ed770b 100644 --- a/clang/test/Driver/ppc-mrop-protection-support-check.c +++ b/clang/test/Driver/ppc-mrop-protection-support-check.c @@ -1,15 +1,20 @@ // RUN: not %clang -target powerpc64le-unknown-linux-gnu -fsyntax-only \ // RUN: -mcpu=pwr10 -mrop-protect %s 2>&1 | FileCheck %s --check-prefix=HASROP // RUN: not %clang -target powerpc64le-unknown-linux-gnu -fsyntax-only \ +// RUN: -mcpu=power10 -mrop-protect %s 2>&1 | FileCheck %s --check-prefix=HASROP +// RUN: not %clang -target powerpc64le-unknown-linux-gnu -fsyntax-only \ +// RUN: -mcpu=pwr9 -mrop-protect %s 2>&1 | FileCheck %s --check-prefix=HASROP +// RUN: not %clang -target powerpc64le-unknown-linux-gnu -fsyntax-only \ +// RUN: -mcpu=power9 -mrop-protect %s 2>&1 | FileCheck %s --check-prefix=HASROP +// RUN: not %clang -target powerpc64le-unknown-linux-gnu -fsyntax-only \ +// RUN: -mcpu=pwr8 -mrop-protect %s 2>&1 | FileCheck %s --check-prefix=HASROP +// RUN: not %clang -target powerpc64le-unknown-linux-gnu -fsyntax-only \ // RUN: -mcpu=power8 -mrop-protect %s 2>&1 | FileCheck %s --check-prefix=HASROP // RUN: not %clang -target powerpc64le-unknown-linux-gnu -fsyntax-only \ // RUN: -mcpu=pwr7 -mrop-protect %s 2>&1 | FileCheck %s --check-prefix=NOROP - -// RUN: not %clang -target powerpc-unknown-linux -fsyntax-only \ -// RUN: -mcpu=pwr8 -mrop-protect %s 2>&1 | FileCheck %s --check-prefix=32BIT -// RUN: not %clang -target powerpc-unknown-aix -fsyntax-only \ -// RUN: -mcpu=pwr8 -mrop-protect %s 2>&1 | FileCheck %s --check-prefix=32BIT +// RUN: not %clang -target powerpc64le-unknown-linux-gnu -fsyntax-only \ +// RUN: -mcpu=power7 -mrop-protect %s 2>&1 | FileCheck %s --check-prefix=NOROP #ifdef __ROP_PROTECT__ static_assert(false, "ROP Protect enabled"); @@ -19,4 +24,3 @@ static_assert(false, "ROP Protect enabled"); // HASROP-NOT: option '-mrop-protect' cannot be specified with // NOROP: option '-mrop-protect' cannot be specified with -// 32BIT: option '-mrop-protect' cannot be specified on this target diff --git a/clang/test/Driver/print-supported-extensions-riscv.c b/clang/test/Driver/print-supported-extensions-riscv.c index bd3c1b7de743a..f7d4ecb057d6e 100644 --- a/clang/test/Driver/print-supported-extensions-riscv.c +++ b/clang/test/Driver/print-supported-extensions-riscv.c @@ -10,7 +10,6 @@ // CHECK-NEXT: a 2.1 'A' (Atomic Instructions) // CHECK-NEXT: f 2.2 'F' (Single-Precision Floating-Point) // CHECK-NEXT: d 2.2 'D' (Double-Precision Floating-Point) -// CHECK-NEXT: q 2.2 'Q' (Quad-Precision Floating-Point) // CHECK-NEXT: c 2.0 'C' (Compressed Instructions) // CHECK-NEXT: b 1.0 'B' (the collection of the Zba, Zbb, Zbs extensions) // CHECK-NEXT: v 1.0 'V' (Vector Extension for Application Processors) @@ -158,7 +157,6 @@ // CHECK-NEXT: svpbmt 1.0 'Svpbmt' (Page-Based Memory Types) // CHECK-NEXT: svvptc 1.0 'Svvptc' (Obviating Memory-Management Instructions after Marking PTEs Valid) // CHECK-NEXT: xandesperf 5.0 'XAndesPerf' (Andes Performance Extension) -// CHECK-NEXT: xandesvdot 5.0 'XAndesVDot' (Andes Vector Dot Product Extension) // CHECK-NEXT: xandesvpackfph 5.0 'XAndesVPackFPH' (Andes Vector Packed FP16 Extension) // CHECK-NEXT: xcvalu 1.0 'XCValu' (CORE-V ALU Operations) // CHECK-NEXT: xcvbi 1.0 'XCVbi' (CORE-V Immediate Branching) diff --git a/clang/test/Driver/riscv-arch.c b/clang/test/Driver/riscv-arch.c index 1da8311b5de98..018fa25218ea6 100644 --- a/clang/test/Driver/riscv-arch.c +++ b/clang/test/Driver/riscv-arch.c @@ -10,8 +10,6 @@ // RUN: -fsyntax-only 2>&1 | FileCheck %s // RUN: %clang --target=riscv32-unknown-elf -march=rv32imafd -### %s \ // RUN: -fsyntax-only 2>&1 | FileCheck %s -// RUN: %clang --target=riscv32-unknown-elf -march=rv32imafdq -### %s \ -// RUN: -fsyntax-only 2>&1 | FileCheck %s // RUN: %clang --target=riscv32-unknown-elf -march=rv32ic -### %s \ // RUN: -fsyntax-only 2>&1 | FileCheck %s @@ -23,8 +21,6 @@ // RUN: -fsyntax-only 2>&1 | FileCheck %s // RUN: %clang --target=riscv32-unknown-elf -march=rv32imafdc -### %s \ // RUN: -fsyntax-only 2>&1 | FileCheck %s -// RUN: %clang --target=riscv32-unknown-elf -march=rv32imafdqc -### %s \ -// RUN: -fsyntax-only 2>&1 | FileCheck %s // RUN: %clang --target=riscv32-unknown-elf -march=rv32ia -### %s \ // RUN: -fsyntax-only 2>&1 | FileCheck %s @@ -32,8 +28,6 @@ // RUN: -fsyntax-only 2>&1 | FileCheck %s // RUN: %clang --target=riscv32-unknown-elf -march=rv32iafd -### %s \ // RUN: -fsyntax-only 2>&1 | FileCheck %s -// RUN: %clang --target=riscv32-unknown-elf -march=rv32iafdq -### %s \ -// RUN: -fsyntax-only 2>&1 | FileCheck %s // RUN: %clang --target=riscv32-unknown-elf -march=rv32iac -### %s \ // RUN: -fsyntax-only 2>&1 | FileCheck %s @@ -41,8 +35,6 @@ // RUN: -fsyntax-only 2>&1 | FileCheck %s // RUN: %clang --target=riscv32-unknown-elf -march=rv32iafdc -### %s \ // RUN: -fsyntax-only 2>&1 | FileCheck %s -// RUN: %clang --target=riscv32-unknown-elf -march=rv32iafdqc -### %s \ -// RUN: -fsyntax-only 2>&1 | FileCheck %s // RUN: %clang --target=riscv32-unknown-elf -march=rv32g -### %s \ // RUN: -fsyntax-only 2>&1 | FileCheck %s @@ -88,8 +80,6 @@ // RUN: -fsyntax-only 2>&1 | FileCheck %s // RUN: %clang --target=riscv64-unknown-elf -march=rv64imafd -### %s \ // RUN: -fsyntax-only 2>&1 | FileCheck %s -// RUN: %clang --target=riscv64-unknown-elf -march=rv64imafdq -### %s \ -// RUN: -fsyntax-only 2>&1 | FileCheck %s // RUN: %clang --target=riscv64-unknown-elf -march=rv64ic -### %s \ // RUN: -fsyntax-only 2>&1 | FileCheck %s @@ -101,8 +91,6 @@ // RUN: -fsyntax-only 2>&1 | FileCheck %s // RUN: %clang --target=riscv64-unknown-elf -march=rv64imafdc -### %s \ // RUN: -fsyntax-only 2>&1 | FileCheck %s -// RUN: %clang --target=riscv64-unknown-elf -march=rv64imafdqc -### %s \ -// RUN: -fsyntax-only 2>&1 | FileCheck %s // RUN: %clang --target=riscv64-unknown-elf -march=rv64ia -### %s \ // RUN: -fsyntax-only 2>&1 | FileCheck %s @@ -110,8 +98,6 @@ // RUN: -fsyntax-only 2>&1 | FileCheck %s // RUN: %clang --target=riscv64-unknown-elf -march=rv64iafd -### %s \ // RUN: -fsyntax-only 2>&1 | FileCheck %s -// RUN: %clang --target=riscv64-unknown-elf -march=rv64iafdq -### %s \ -// RUN: -fsyntax-only 2>&1 | FileCheck %s // RUN: %clang --target=riscv64-unknown-elf -march=rv64iac -### %s \ // RUN: -fsyntax-only 2>&1 | FileCheck %s @@ -119,8 +105,6 @@ // RUN: -fsyntax-only 2>&1 | FileCheck %s // RUN: %clang --target=riscv64-unknown-elf -march=rv64iafdc -### %s \ // RUN: -fsyntax-only 2>&1 | FileCheck %s -// RUN: %clang --target=riscv64-unknown-elf -march=rv64iafdqc -### %s \ -// RUN: -fsyntax-only 2>&1 | FileCheck %s // RUN: %clang --target=riscv64-unknown-elf -march=rv64g -### %s \ // RUN: -fsyntax-only 2>&1 | FileCheck %s @@ -227,6 +211,11 @@ // RV32-LETTER: error: invalid arch name 'rv32q', // RV32-LETTER: first letter after 'rv32' should be 'e', 'i' or 'g' +// RUN: not %clang --target=riscv32-unknown-elf -march=rv32imcq -### %s \ +// RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV32-ORDER %s +// RV32-ORDER: error: invalid arch name 'rv32imcq', +// RV32-ORDER: unsupported standard user-level extension 'q' + // RUN: not %clang --target=riscv32-unknown-elf -march=rv32izvl64b -### %s \ // RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV32-ZVL64B-ER %s // RV32-ZVL64B-ER: error: invalid arch name 'rv32izvl64b', @@ -237,6 +226,11 @@ // RV32-STD-INVAL: error: invalid arch name 'rv32imw', // RV32-STD-INVAL: invalid standard user-level extension 'w' +// RUN: not %clang --target=riscv32-unknown-elf -march=rv32imqc -### %s \ +// RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV32-STD %s +// RV32-STD: error: invalid arch name 'rv32imqc', +// RV32-STD: unsupported standard user-level extension 'q' + // RUN: not %clang --target=riscv32-unknown-elf -march=rv32xabc -### %s \ // RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV32X %s // RV32X: error: invalid arch name 'rv32xabc', diff --git a/clang/test/Headers/__clang_hip_cmath-return_types.hip b/clang/test/Headers/__clang_hip_cmath-return_types.hip deleted file mode 100644 index 146235244c45f..0000000000000 --- a/clang/test/Headers/__clang_hip_cmath-return_types.hip +++ /dev/null @@ -1,1023 +0,0 @@ -// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \ -// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \ -// RUN: -internal-isystem %S/Inputs/include \ -// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \ -// RUN: -target-cpu gfx906 %s -fcuda-is-device -fsyntax-only -o - - -template -struct is_same { - static constexpr bool value = false; -}; - -template -struct is_same { - static constexpr bool value = true; -}; - -__device__ void test_abs() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_acos() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_asin() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_atan() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_atan2() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_ceil() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_cos() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_cosh() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_exp() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_fabs() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_floor() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_fmod() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_frexp() -{ - int ip; - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_ldexp() -{ - int ip = 1; - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_log() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_log10() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_modf() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - double i; -} - -__device__ void test_pow() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_sin() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_sinh() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_sqrt() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_tan() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_tanh() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_signbit() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_fpclassify() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_isfinite() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_isnormal() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_isgreater() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_isgreaterequal() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_isinf() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_isless() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_islessequal() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_islessgreater() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_isnan() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_isunordered() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_acosh() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_asinh() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_atanh() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_cbrt() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_copysign() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_erf() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_erfc() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_exp2() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_expm1() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_fdim() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_fma() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - - static_assert(is_same::value, ""); -} - -__device__ void test_fmax() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_fmin() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_hypot() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_ilogb() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_lgamma() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_llrint() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_llround() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_log1p() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_log2() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_logb() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_lrint() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_lround() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_nan() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_nearbyint() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_nextafter() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_remainder() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_remquo() -{ - int ip; - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_rint() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_round() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_scalbln() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_scalbn() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_tgamma() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__device__ void test_trunc() -{ - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); -} - -__global__ void tests() -{ - test_abs(); - test_acos(); - test_asin(); - test_atan(); - test_atan2(); - test_ceil(); - test_cos(); - test_cosh(); - test_exp(); - test_fabs(); - test_floor(); - test_fmod(); - test_frexp(); - test_ldexp(); - test_log(); - test_log10(); - test_modf(); - test_pow(); - test_sin(); - test_sinh(); - test_sqrt(); - test_tan(); - test_tanh(); - test_signbit(); - test_fpclassify(); - test_isfinite(); - test_isnormal(); - test_isgreater(); - test_isgreaterequal(); - test_isinf(); - test_isless(); - test_islessequal(); - test_islessgreater(); - test_isnan(); - test_isunordered(); - test_acosh(); - test_asinh(); - test_atanh(); - test_cbrt(); - test_copysign(); - test_erf(); - test_erfc(); - test_exp2(); - test_expm1(); - test_fdim(); - test_fma(); - test_fmax(); - test_fmin(); - test_hypot(); - test_ilogb(); - test_lgamma(); - test_llrint(); - test_llround(); - test_log1p(); - test_log2(); - test_logb(); - test_lrint(); - test_lround(); - test_nan(); - test_nearbyint(); - test_nextafter(); - test_remainder(); - test_remquo(); - test_rint(); - test_round(); - test_scalbln(); - test_scalbn(); - test_tgamma(); - test_trunc(); -} diff --git a/clang/test/Modules/pr130712.cppm b/clang/test/Modules/pr130712.cppm deleted file mode 100644 index 4c7a21ea1f289..0000000000000 --- a/clang/test/Modules/pr130712.cppm +++ /dev/null @@ -1,33 +0,0 @@ -// RUN: split-file %s %t - -// There are two requirements here to result in the owner of a macro being null. -// 1) There must be a configuration mismatch between a header and a file it depends on -// 2) -fmodules-local-submodule-visibility must be enabled. - -// In the following example, when compiling module C, A_H has no owning module. - -// RUN: %clang_cc1 -I%t -emit-module -o %t/a.pcm -fmodules %t/module.modulemap -fmodule-name=a -fmodules-local-submodule-visibility -// RUN: %clang_cc1 -fexceptions -Wno-module-file-config-mismatch -I%t -emit-module -o %t/b.pcm -fmodules %t/module.modulemap -fmodule-name=b -fmodules-local-submodule-visibility -fmodule-file=%t/a.pcm -// RUN: %clang_cc1 -fexceptions -Wno-module-file-config-mismatch -I%t -emit-module -o %t/c.pcm -fmodules %t/module.modulemap -fmodule-name=c -fmodules-local-submodule-visibility -fmodule-file=%t/a.pcm -fmodule-file=%t/b.pcm - -//--- module.modulemap -module a { header "a.h" } -module b { header "b.h" } -module c { header "c.h" } - -//--- a.h -#ifndef A_H -#define A_H -#endif - -//--- b.h -#ifndef B_H -#define B_H - -#include - -#endif - -//--- c.h -#include -#include diff --git a/clang/test/OpenMP/openmp_non_c_directives.c b/clang/test/OpenMP/openmp_non_c_directives.c deleted file mode 100644 index 844d7dad551bc..0000000000000 --- a/clang/test/OpenMP/openmp_non_c_directives.c +++ /dev/null @@ -1,12 +0,0 @@ -// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s - -// Test the reaction to some Fortran-only directives. - -void foo() { -#pragma omp allocators // expected-error {{expected an OpenMP directive}} -#pragma omp do // expected-error {{expected an OpenMP directive}} -#pragma omp end workshare // expected-error {{expected an OpenMP directive}} -#pragma omp parallel workshare // expected-warning {{extra tokens at the end of '#pragma omp parallel' are ignored}} -#pragma omp workshare // expected-error {{expected an OpenMP directive}} -} - diff --git a/clang/test/OpenMP/openmp_workshare.c b/clang/test/OpenMP/openmp_workshare.c new file mode 100644 index 0000000000000..0302eb19f9ef4 --- /dev/null +++ b/clang/test/OpenMP/openmp_workshare.c @@ -0,0 +1,8 @@ +// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s + +// Workshare is a Fortran-only directive. + +void foo() { +#pragma omp workshare // expected-error {{expected an OpenMP directive}} +} + diff --git a/clang/test/Preprocessor/riscv-target-features.c b/clang/test/Preprocessor/riscv-target-features.c index e3b456e0245f7..25f15cc5283f9 100644 --- a/clang/test/Preprocessor/riscv-target-features.c +++ b/clang/test/Preprocessor/riscv-target-features.c @@ -20,7 +20,6 @@ // CHECK-NOT: __riscv_m {{.*$}} // CHECK-NOT: __riscv_mul {{.*$}} // CHECK-NOT: __riscv_muldiv {{.*$}} -// CHECK-NOT: __riscv_q {{.*$}} // CHECK-NOT: __riscv_sha {{.*$}} // CHECK-NOT: __riscv_shcounterenw {{.*$}} // CHECK-NOT: __riscv_shgatpa {{.*$}} @@ -335,17 +334,6 @@ // CHECK-M-EXT: __riscv_mul 1 // CHECK-M-EXT: __riscv_muldiv 1 -// RUN: %clang --target=riscv32-unknown-linux-gnu \ -// RUN: -march=rv32ifdq -E -dM %s \ -// RUN: -o - | FileCheck --check-prefix=CHECK-Q-EXT %s -// RUN: %clang --target=riscv64-unknown-linux-gnu \ -// RUN: -march=rv64ifdq -E -dM %s \ -// RUN: -o - | FileCheck --check-prefix=CHECK-Q-EXT %s -// CHECK-Q-EXT: __riscv_fdiv 1 -// CHECK-Q-EXT: __riscv_flen 128 -// CHECK-Q-EXT: __riscv_fsqrt 1 -// CHECK-Q-EXT: __riscv_q 2002000{{$}} - // RUN: %clang --target=riscv32-unknown-linux-gnu \ // RUN: -march=rv32isha -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-SHCOUNTERENW-EXT %s diff --git a/clang/test/Refactor/source-loc-zero.cpp b/clang/test/Refactor/source-loc-zero.cpp deleted file mode 100644 index 61b782743aece..0000000000000 --- a/clang/test/Refactor/source-loc-zero.cpp +++ /dev/null @@ -1,17 +0,0 @@ -// Regression test for #139375 -// Clang uses 1-based indexing for source locations given from the command-line. -// Verify that `clang-refactor` rejects 0 as an invalid value for line or column number. - -// For range start: -// RUN: not clang-refactor local-rename -selection=%s:0:1-1:1 -new-name=test %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHECK-DIAG %s -// RUN: not clang-refactor local-rename -selection=%s:1:0-1:1 -new-name=test %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHECK-DIAG %s - -// For range end: -// RUN: not clang-refactor local-rename -selection=%s:1:1-0:1 -new-name=test %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHECK-DIAG %s -// RUN: not clang-refactor local-rename -selection=%s:1:1-1:0 -new-name=test %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHECK-DIAG %s - -// CHECK-DIAG: error: '-selection' option must be specified using :: or ::-: format, where and are integers greater than zero. diff --git a/clang/test/Sema/atomic-expr.c b/clang/test/Sema/atomic-expr.c index 96571e3e68c87..7e5219dd3f14a 100644 --- a/clang/test/Sema/atomic-expr.c +++ b/clang/test/Sema/atomic-expr.c @@ -114,23 +114,6 @@ void func_16(void) { (void)sizeof(xp->val); (void)sizeof(y.ival); (void)sizeof(yp->ival); - - // Also, do not diagnose in unreachable code paths. - { - if (0) { - x.val = 12; - xp->val = 12; - (void)y.ival; - (void)yp->ival; - } - - return; - - x.val = 12; - xp->val = 12; - (void)y.ival; - (void)yp->ival; - } } // Ensure that we correctly implement assignment constraints from C2x 6.5.16.1. diff --git a/clang/test/SemaCXX/cxx2a-consteval.cpp b/clang/test/SemaCXX/cxx2a-consteval.cpp index d9932e4dd8241..d9d144cafdbcc 100644 --- a/clang/test/SemaCXX/cxx2a-consteval.cpp +++ b/clang/test/SemaCXX/cxx2a-consteval.cpp @@ -1300,25 +1300,3 @@ void foo() { } } - -// https://github.com/llvm/llvm-project/issues/139160 -namespace GH139160{ - // original test case taken from Github - struct A {int x[1]; }; - A f(); // expected-note {{declared here}} - typedef int *t[]; - consteval int* f(int* x) { return x; } - - int ** x = (t){f(f().x)}; // expected-error {{call to consteval function 'GH139160::f' is not a constant expression}} - // expected-note@-1 {{non-constexpr function 'f' cannot be used in a constant expression}} - // expected-error@-2 {{initializer element is not a compile-time constant}} - - struct B {int value, value_two;}; - B make_struct() {return {10, 20};} // expected-note {{declared here}} - consteval int get_value(B container) {return container.value;} - B result = (B){10, get_value(make_struct())}; // expected-error {{initializer element is not a compile-time constant}} - // expected-error@-1 {{call to consteval function 'GH139160::get_value' is not a constant expression}} - // expected-note@-2 {{non-constexpr function 'make_struct' cannot be used in a constant expression}} -}; - - diff --git a/clang/test/SemaCXX/cxx2b-deducing-this.cpp b/clang/test/SemaCXX/cxx2b-deducing-this.cpp index 2286da8d1c0e5..7e392213710a4 100644 --- a/clang/test/SemaCXX/cxx2b-deducing-this.cpp +++ b/clang/test/SemaCXX/cxx2b-deducing-this.cpp @@ -926,33 +926,6 @@ struct C { (&fref)(); } }; - -struct CTpl { - template - constexpr int c(this const CTpl&, T) { // #P2797-ctpl-1 - return 42; - } - - template - void c(T)&; // #P2797-ctpl-2 - - template - static void c(T = 0, T = 0); // #P2797-ctpl-3 - - void d() { - c(0); // expected-error {{call to member function 'c' is ambiguous}} - // expected-note@#P2797-ctpl-1{{candidate}} - // expected-note@#P2797-ctpl-2{{candidate}} - // expected-note@#P2797-ctpl-3{{candidate}} - (CTpl::c)(0); // expected-error {{call to member function 'c' is ambiguous}} - // expected-note@#P2797-ctpl-1{{candidate}} - // expected-note@#P2797-ctpl-2{{candidate}} - // expected-note@#P2797-ctpl-3{{candidate}} - - static_assert((&CTpl::c)(CTpl{}, 0) == 42); // selects #1 - } -}; - } namespace GH85992 { diff --git a/clang/test/SemaCXX/dllexport.cpp b/clang/test/SemaCXX/dllexport.cpp index f503e2fc311d1..22d92c30954e8 100644 --- a/clang/test/SemaCXX/dllexport.cpp +++ b/clang/test/SemaCXX/dllexport.cpp @@ -2,8 +2,6 @@ // RUN: %clang_cc1 -triple x86_64-win32 -fsyntax-only -fms-extensions -verify -std=c++1y -Wunsupported-dll-base-class-template -DMS %s // RUN: %clang_cc1 -triple i686-mingw32 -fsyntax-only -fms-extensions -verify -std=c++1y -Wunsupported-dll-base-class-template -DGNU %s // RUN: %clang_cc1 -triple x86_64-mingw32 -fsyntax-only -fms-extensions -verify -std=c++11 -Wunsupported-dll-base-class-template -DGNU %s -// RUN: %clang_cc1 -triple i686-pc-cygwin -fsyntax-only -fms-extensions -verify -std=c++1y -Wunsupported-dll-base-class-template -DGNU %s -// RUN: %clang_cc1 -triple x86_64-pc-cygwin -fsyntax-only -fms-extensions -verify -std=c++11 -Wunsupported-dll-base-class-template -DGNU %s // RUN: %clang_cc1 -triple i686-windows-itanium -fsyntax-only -fms-extensions -verify -std=c++11 -Wunsupported-dll-base-class-template -DWI %s // RUN: %clang_cc1 -triple x86_64-windows-itanium -fsyntax-only -fms-extensions -verify -std=c++1y -Wunsupported-dll-base-class-template -DWI %s // RUN: %clang_cc1 -triple x86_64-scei-ps4 -fsyntax-only -fdeclspec -verify -std=c++11 -Wunsupported-dll-base-class-template -DPS %s diff --git a/clang/test/SemaCXX/dllimport.cpp b/clang/test/SemaCXX/dllimport.cpp index b7a1a62b8725b..996e92f611d3f 100644 --- a/clang/test/SemaCXX/dllimport.cpp +++ b/clang/test/SemaCXX/dllimport.cpp @@ -3,9 +3,6 @@ // RUN: %clang_cc1 -triple i686-mingw32 -fsyntax-only -fms-extensions -verify -std=c++1y -Wunsupported-dll-base-class-template -DGNU %s // RUN: %clang_cc1 -triple x86_64-mingw32 -fsyntax-only -fms-extensions -verify -std=c++11 -Wunsupported-dll-base-class-template -DGNU %s // RUN: %clang_cc1 -triple x86_64-mingw32 -fsyntax-only -fms-extensions -verify -std=c++17 -Wunsupported-dll-base-class-template -DGNU %s -// RUN: %clang_cc1 -triple i686-pc-cygwin -fsyntax-only -fms-extensions -verify -std=c++1y -Wunsupported-dll-base-class-template -DGNU %s -// RUN: %clang_cc1 -triple x86_64-pc-cygwin -fsyntax-only -fms-extensions -verify -std=c++11 -Wunsupported-dll-base-class-template -DGNU %s -// RUN: %clang_cc1 -triple x86_64-pc-cygwin -fsyntax-only -fms-extensions -verify -std=c++17 -Wunsupported-dll-base-class-template -DGNU %s // RUN: %clang_cc1 -triple i686-windows-itanium -fsyntax-only -fms-extensions -verify -std=c++11 -Wunsupported-dll-base-class-template -DWI %s // RUN: %clang_cc1 -triple x86_64-windows-itanium -fsyntax-only -fms-extensions -verify -std=c++17 -Wunsupported-dll-base-class-template -DWI %s // RUN: %clang_cc1 -triple x86_64-scei-ps4 -fsyntax-only -fdeclspec -verify -std=c++11 -Wunsupported-dll-base-class-template -DPS %s diff --git a/clang/test/SemaCXX/warn-implicit-unicode-conversions.cpp b/clang/test/SemaCXX/warn-implicit-unicode-conversions.cpp deleted file mode 100644 index fcff006d0e028..0000000000000 --- a/clang/test/SemaCXX/warn-implicit-unicode-conversions.cpp +++ /dev/null @@ -1,151 +0,0 @@ -// RUN: %clang_cc1 -verify -fsyntax-only -std=c++20 -Wconversion %s - -void c8(char8_t); -void c16(char16_t); -void c32(char32_t); - -void test(char8_t u8, char16_t u16, char32_t u32) { - c8(u8); - c8(u16); // expected-warning {{implicit conversion from 'char16_t' to 'char8_t' may lose precision and change the meaning of the represented code unit}} - c8(u32); // expected-warning {{implicit conversion from 'char32_t' to 'char8_t' may lose precision and change the meaning of the represented code unit}} - - c16(u8); // expected-warning {{implicit conversion from 'char8_t' to 'char16_t' may change the meaning of the represented code unit}} - c16(u16); - c16(u32); // expected-warning {{implicit conversion from 'char32_t' to 'char16_t' may lose precision and change the meaning of the represented code unit}} - - c32(u8); // expected-warning {{implicit conversion from 'char8_t' to 'char32_t' may change the meaning of the represented code unit}} - c32(u16); // expected-warning {{implicit conversion from 'char16_t' to 'char32_t' may change the meaning of the represented code unit}} - c32(u32); - - - c8(char32_t(0x7f)); - c8(char32_t(0x80)); // expected-warning {{implicit conversion from 'char32_t' to 'char8_t' changes the meaning of the code point ''}} - - c8(char16_t(0x7f)); - c8(char16_t(0x80)); // expected-warning {{implicit conversion from 'char16_t' to 'char8_t' changes the meaning of the code point ''}} - c8(char16_t(0xD800)); // expected-warning {{implicit conversion from 'char16_t' to 'char8_t' changes the meaning of the code unit '<0xD800>'}} - c8(char16_t(0xE000)); // expected-warning {{implicit conversion from 'char16_t' to 'char8_t' changes the meaning of the code point ''}} - - - c16(char32_t(0x7f)); - c16(char32_t(0x80)); - c16(char32_t(0xD7FF)); - c16(char32_t(0xD800)); // expected-warning {{implicit conversion from 'char32_t' to 'char16_t' changes the meaning of the code unit '<0xD800>'}} - c16(char32_t(0xE000)); - c16(char32_t(U'🐉')); // expected-warning {{implicit conversion from 'char32_t' to 'char16_t' changes the meaning of the code point '🐉'}} - - - c32(char8_t(0x7f)); - c32(char8_t(0x80)); // expected-warning {{implicit conversion from 'char8_t' to 'char32_t' changes the meaning of the code unit '<0x80>'}} - c32(char8_t(0xFF)); // expected-warning {{implicit conversion from 'char8_t' to 'char32_t' changes the meaning of the code unit '<0xFF>'}} - - - c32(char16_t(0x7f)); - c32(char16_t(0x80)); - - c32(char16_t(0xD7FF)); - c32(char16_t(0xD800)); // expected-warning {{implicit conversion from 'char16_t' to 'char32_t' changes the meaning of the code unit '<0xD800>'}} - c32(char16_t(0xDFFF)); // expected-warning {{implicit conversion from 'char16_t' to 'char32_t' changes the meaning of the code unit '<0xDFFF>'}} - c32(char16_t(0xE000)); - c32(char16_t(u'☕')); - - (void)static_cast(char8_t(0x80)); //no warnings for explicit conversions. - - using Char8 = char8_t; - Char8 c81 = u16; // expected-warning {{implicit conversion from 'char16_t' to 'Char8' (aka 'char8_t') may lose precision and change the meaning of the represented code unit}} - - [[maybe_unused]] char c = u16; // expected-warning {{implicit conversion loses integer precision: 'char16_t' to 'char'}} - - // FIXME: We should apply the same logic to wchar - [[maybe_unused]] wchar_t wc = u16; - [[maybe_unused]] wchar_t wc2 = u8; -} - -void test_comp(char8_t u8, char16_t u16, char32_t u32) { - (void)(u8 == u8' '); - (void)(u8 == u' '); - (void)(u8 == U' '); - - (void)(u16 == u8' '); - (void)(u16 == U' '); - - (void)(u32 == u8' '); - (void)(u32 == u' '); - (void)(u32 == U' '); - - (void)(u8 == u'\u00FF'); // expected-warning{{comparing values of different Unicode code unit types 'char8_t' and 'char16_t' may compare different code points}} - (void)(u8 == U'\u00FF'); // expected-warning{{comparing values of different Unicode code unit types 'char8_t' and 'char32_t' may compare different code points}} - - (void)(u16 == u8'\xFF'); // expected-warning{{comparing values of different Unicode code unit types 'char16_t' and 'char8_t' may compare different code points}} - (void)(u16 == u'\u00FF'); - (void)(u16 == U'\u00FF'); - (void)(u16 == U'\xD800'); // expected-warning{{comparing values of different Unicode code unit types 'char16_t' and 'char32_t' may compare different code points}} - - (void)(u32 == u8'\xFF'); // expected-warning{{comparing values of different Unicode code unit types 'char32_t' and 'char8_t' may compare different code points}} - (void)(u32 == u'\u00FF'); - (void)(u32 == u'\xD800'); // expected-warning{{comparing values of different Unicode code unit types 'char32_t' and 'char16_t' may compare different code points}} - - (void)(char8_t(0x7f) == char8_t(0x7f)); - (void)(char8_t(0x7f) == char16_t(0x7f)); - (void)(char8_t(0x7f) == char32_t(0x7f)); - - (void)(char8_t(0x80) == char8_t(0x80)); - (void)(char8_t(0x80) == char16_t(0x80)); // expected-warning{{comparing values of different Unicode code unit types 'char8_t' and 'char16_t' compares unrelated code units '<0x80>' and '}} - (void)(char8_t(0x80) == char32_t(0x80)); // expected-warning{{comparing values of different Unicode code unit types 'char8_t' and 'char32_t' compares unrelated code units '<0x80>' and '}} - - (void)(char8_t(0x80) == char8_t(0x7f)); - (void)(char8_t(0x80) == char16_t(0x7f)); // expected-warning{{comparing values of different Unicode code unit types 'char8_t' and 'char16_t' compares unrelated code units '<0x80>' and ''}} - (void)(char8_t(0x80) == char32_t(0x7f)); // expected-warning{{comparing values of different Unicode code unit types 'char8_t' and 'char32_t' compares unrelated code units '<0x80>' and ''}} - - - (void)(char16_t(0x7f) < char8_t(0x7f)); - (void)(char16_t(0x7f) < char16_t(0x7f)); - (void)(char16_t(0x7f) < char32_t(0x7f)); - - (void)(char16_t(0x80) < char8_t(0x80)); // expected-warning{{comparing values of different Unicode code unit types 'char16_t' and 'char8_t' compares unrelated code units '' and '<0x80>'}} - (void)(char16_t(0x80) < char16_t(0x80)); - (void)(char16_t(0x80) < char32_t(0x80)); - - (void)(char16_t(0x80) == char8_t(0x7f)); - (void)(char16_t(0x80) < char16_t(0x7f)); - (void)(char16_t(0x80) < char32_t(0x7f)); - - - (void)(char32_t(0x7f) < char8_t(0x7f)); - (void)(char32_t(0x7f) < char16_t(0x7f)); - (void)(char32_t(0x7f) < char32_t(0x7f)); - - (void)(char32_t(0x80) < char8_t(0x80)); // expected-warning{{comparing values of different Unicode code unit types 'char32_t' and 'char8_t' compares unrelated code units '' and '<0x80>'}} - (void)(char32_t(0x80) < char16_t(0x80)); - (void)(char32_t(0x80) < char32_t(0x80)); - - (void)(char32_t(0x80) == char8_t(0x7f)); - (void)(char32_t(0x80) < char16_t(0x7f)); - (void)(char32_t(0x80) < char32_t(0x7f)); - - - (void)(char32_t(U'🐉') <= char16_t(0xD800)); // expected-warning{{comparing values of different Unicode code unit types 'char32_t' and 'char16_t' compares unrelated code units '🐉' and '<0xD800>'}} - (void)(char32_t(U'🐉') <= char16_t(0xD7FF)); - - (void)(char16_t(0xD800) >= char32_t(U'🐉')); // expected-warning{{comparing values of different Unicode code unit types 'char16_t' and 'char32_t' compares unrelated code units '<0xD800>' and '🐉'}} - (void)(char16_t(0xD7FF) >= char32_t(U'🐉')); -} - -void check_arithmetic(char8_t u8, char16_t u16, char32_t u32) { - - (void)(u8 + u8); - (void)(u16 += u16); - (void)(u32 & u32); - (void)(1 ? u16 : u16); - - (void)(u8 + u16); // expected-warning {{arithmetic between different Unicode character types 'char8_t' and 'char16_t'}} - (void)(u8 += u16); // expected-warning {{compound assignment of different Unicode character types 'char8_t' and 'char16_t'}} - (void)(u8 & u16); // expected-warning {{bitwise operation between different Unicode character types 'char8_t' and 'char16_t'}} - (void)(1 ? u8 : u16); // expected-warning {{conditional expression between different Unicode character types 'char8_t' and 'char16_t'}} - - - (void)(u16 * u32); // expected-warning {{arithmetic between different Unicode character types 'char16_t' and 'char32_t'}} - (void)(u16 -= u32); // expected-warning {{compound assignment of different Unicode character types 'char16_t' and 'char32_t'}} - (void)(u16 | u32); // expected-warning {{bitwise operation between different Unicode character types 'char16_t' and 'char32_t'}} - (void)(1 ? u32 : u16); // expected-warning {{conditional expression between different Unicode character types 'char32_t' and 'char16_t'}} -} diff --git a/clang/test/lit.cfg.py b/clang/test/lit.cfg.py index 2b35bb5dcbdaf..f963b656b663c 100644 --- a/clang/test/lit.cfg.py +++ b/clang/test/lit.cfg.py @@ -70,8 +70,6 @@ llvm_config.use_clang() -config.substitutions.append(("%src_dir", config.clang_src_dir)) - config.substitutions.append(("%src_include_dir", config.clang_src_dir + "/include")) config.substitutions.append(("%target_triple", config.target_triple)) diff --git a/clang/tools/clang-refactor/ClangRefactor.cpp b/clang/tools/clang-refactor/ClangRefactor.cpp index a92b3f91beaed..968f0594085d4 100644 --- a/clang/tools/clang-refactor/ClangRefactor.cpp +++ b/clang/tools/clang-refactor/ClangRefactor.cpp @@ -160,8 +160,7 @@ SourceSelectionArgument::fromString(StringRef Value) { return std::make_unique(std::move(*Range)); llvm::errs() << "error: '-selection' option must be specified using " ":: or " - "::-: format, " - "where and are integers greater than zero.\n"; + "::-: format\n"; return nullptr; } diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp index 39493d718f0af..b7b2580d72a0e 100644 --- a/clang/utils/TableGen/NeonEmitter.cpp +++ b/clang/utils/TableGen/NeonEmitter.cpp @@ -2053,20 +2053,8 @@ void NeonEmitter::createIntrinsic(const Record *R, auto &Entry = IntrinsicMap[Name]; for (auto &I : NewTypeSpecs) { - - // MFloat8 type is only available on AArch64. If encountered set ArchGuard - // correctly. - std::string NewArchGuard = ArchGuard; - if (Type(I.first, ".").isMFloat8()) { - if (NewArchGuard.empty()) { - NewArchGuard = "defined(__aarch64__)"; - } else if (NewArchGuard.find("defined(__aarch64__)") == - std::string::npos) { - NewArchGuard = "defined(__aarch64__) && (" + NewArchGuard + ")"; - } - } Entry.emplace_back(R, Name, Proto, I.first, I.second, CK, Body, *this, - NewArchGuard, TargetGuard, IsUnavailable, BigEndianSafe); + ArchGuard, TargetGuard, IsUnavailable, BigEndianSafe); Out.push_back(&Entry.back()); } diff --git a/compiler-rt/cmake/Modules/AddCompilerRT.cmake b/compiler-rt/cmake/Modules/AddCompilerRT.cmake index 86e19e08270d7..d346b0ec01b03 100644 --- a/compiler-rt/cmake/Modules/AddCompilerRT.cmake +++ b/compiler-rt/cmake/Modules/AddCompilerRT.cmake @@ -123,7 +123,7 @@ macro(set_output_name output name arch) else() if(ANDROID AND ${arch} STREQUAL "i386") set(${output} "${name}-i686${COMPILER_RT_OS_SUFFIX}") - elseif(NOT "${arch}" MATCHES "^arm64" AND "${arch}" MATCHES "^arm") + elseif("${arch}" MATCHES "^arm") if(COMPILER_RT_DEFAULT_TARGET_ONLY) set(triple "${COMPILER_RT_DEFAULT_TARGET_TRIPLE}") else() diff --git a/compiler-rt/cmake/builtin-config-ix.cmake b/compiler-rt/cmake/builtin-config-ix.cmake index 8c9c84ad64bc0..cbb43a5958d2f 100644 --- a/compiler-rt/cmake/builtin-config-ix.cmake +++ b/compiler-rt/cmake/builtin-config-ix.cmake @@ -59,7 +59,7 @@ else() endif() set(AMDGPU amdgcn) -set(ARM64 aarch64 arm64ec) +set(ARM64 aarch64) set(ARM32 arm armhf armv4t armv5te armv6 armv6m armv7m armv7em armv7 armv7s armv7k armv8m.base armv8m.main armv8.1m.main) set(AVR avr) set(HEXAGON hexagon) diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt index d9b7800a95565..5efc4ab0e85bc 100644 --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -668,7 +668,6 @@ set(armv7k_SOURCES ${arm_SOURCES}) set(arm64_SOURCES ${aarch64_SOURCES}) set(arm64e_SOURCES ${aarch64_SOURCES}) set(arm64_32_SOURCES ${aarch64_SOURCES}) -set(arm64ec_SOURCES ${aarch64_SOURCES}) # macho_embedded archs set(armv6m_SOURCES ${thumb1_SOURCES}) diff --git a/compiler-rt/lib/builtins/aarch64/chkstk.S b/compiler-rt/lib/builtins/aarch64/chkstk.S index 563c09ecbc390..01f90366f0302 100644 --- a/compiler-rt/lib/builtins/aarch64/chkstk.S +++ b/compiler-rt/lib/builtins/aarch64/chkstk.S @@ -15,18 +15,12 @@ // bl __chkstk // sub sp, sp, x15, lsl #4 -#if defined(__aarch64__) || defined(__arm64ec__) - -#ifdef __arm64ec__ -#define CHKSTK_FUNC __chkstk_arm64ec -#else -#define CHKSTK_FUNC __chkstk -#endif +#ifdef __aarch64__ #define PAGE_SIZE 4096 .p2align 2 -DEFINE_COMPILERRT_FUNCTION(CHKSTK_FUNC) +DEFINE_COMPILERRT_FUNCTION(__chkstk) lsl x16, x15, #4 mov x17, sp 1: @@ -36,6 +30,6 @@ DEFINE_COMPILERRT_FUNCTION(CHKSTK_FUNC) b.gt 1b ret -END_COMPILERRT_FUNCTION(CHKSTK_FUNC) +END_COMPILERRT_FUNCTION(__chkstk) -#endif // defined(__aarch64__) || defined(__arm64ec__) +#endif // __aarch64__ diff --git a/compiler-rt/lib/builtins/aarch64/lse.S b/compiler-rt/lib/builtins/aarch64/lse.S index d7c1db7243ef8..1fe18f4a46819 100644 --- a/compiler-rt/lib/builtins/aarch64/lse.S +++ b/compiler-rt/lib/builtins/aarch64/lse.S @@ -20,7 +20,7 @@ // Routines may modify temporary registers tmp0, tmp1, tmp2, // return value x0 and the flags only. -#if defined(__aarch64__) || defined(__arm64ec__) +#ifdef __aarch64__ #ifdef HAS_ASM_LSE .arch armv8-a+lse @@ -267,4 +267,4 @@ NO_EXEC_STACK_DIRECTIVE // GNU property note for BTI and PAC GNU_PROPERTY_BTI_PAC -#endif // defined(__aarch64__) || defined(__arm64ec__) +#endif // __aarch64__ diff --git a/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S b/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S index 73b1ab2c76aa3..e736829967c0c 100644 --- a/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S +++ b/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S @@ -235,7 +235,7 @@ END_COMPILERRT_FUNCTION(__arm_sc_memcpy) DEFINE_COMPILERRT_FUNCTION_ALIAS(__arm_sc_memmove, __arm_sc_memcpy) // This version uses FP registers. Use this only on targets with them -#if (defined(__aarch64__) && __ARM_FP != 0) || defined(__arm64ec__) +#if defined(__aarch64__) && __ARM_FP != 0 // // __arm_sc_memset // diff --git a/compiler-rt/lib/builtins/clear_cache.c b/compiler-rt/lib/builtins/clear_cache.c index eb58452d624ee..441eabd1fe922 100644 --- a/compiler-rt/lib/builtins/clear_cache.c +++ b/compiler-rt/lib/builtins/clear_cache.c @@ -59,14 +59,13 @@ uintptr_t GetCurrentProcess(void); // specified range. void __clear_cache(void *start, void *end) { -#if defined(_WIN32) && \ - (defined(__arm__) || defined(__aarch64__) || defined(__arm64ec__)) - FlushInstructionCache(GetCurrentProcess(), start, end - start); -#elif __i386__ || __x86_64__ || defined(_M_IX86) || defined(_M_X64) +#if __i386__ || __x86_64__ || defined(_M_IX86) || defined(_M_X64) // Intel processors have a unified instruction and data cache // so there is nothing to do #elif defined(__s390__) // no-op +#elif defined(_WIN32) && (defined(__arm__) || defined(__aarch64__)) + FlushInstructionCache(GetCurrentProcess(), start, end - start); #elif defined(__arm__) && !defined(__APPLE__) #if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) struct arm_sync_icache_args arg; diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64.c b/compiler-rt/lib/builtins/cpu_model/aarch64.c index be002dd71992a..4082fd62ea11a 100644 --- a/compiler-rt/lib/builtins/cpu_model/aarch64.c +++ b/compiler-rt/lib/builtins/cpu_model/aarch64.c @@ -14,8 +14,7 @@ #include "aarch64.h" -#if !defined(__aarch64__) && !defined(__arm64__) && !defined(_M_ARM64) && \ - !defined(__arm64ec__) && !defined(_M_ARM64EC) +#if !defined(__aarch64__) && !defined(__arm64__) && !defined(_M_ARM64) #error This file is intended only for aarch64-based targets #endif diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64.h b/compiler-rt/lib/builtins/cpu_model/aarch64.h index 3d9b3aa0e594e..2a734b02b7c90 100644 --- a/compiler-rt/lib/builtins/cpu_model/aarch64.h +++ b/compiler-rt/lib/builtins/cpu_model/aarch64.h @@ -8,8 +8,7 @@ #include "cpu_model.h" -#if !defined(__aarch64__) && !defined(__arm64__) && !defined(_M_ARM64) && \ - !defined(__arm64ec__) && !defined(_M_ARM64EC) +#if !defined(__aarch64__) && !defined(__arm64__) && !defined(_M_ARM64) #error This file is intended only for aarch64-based targets #endif diff --git a/compiler-rt/lib/builtins/fp_compare_impl.inc b/compiler-rt/lib/builtins/fp_compare_impl.inc index f883338c471d3..a9a4f6fbf5dfe 100644 --- a/compiler-rt/lib/builtins/fp_compare_impl.inc +++ b/compiler-rt/lib/builtins/fp_compare_impl.inc @@ -12,7 +12,7 @@ // functions. We need to ensure that the return value is sign-extended in the // same way as GCC expects (since otherwise GCC-generated __builtin_isinf // returns true for finite 128-bit floating-point numbers). -#if defined(__aarch64__) || defined(__arm64ec__) +#ifdef __aarch64__ // AArch64 GCC overrides libgcc_cmp_return to use int instead of long. typedef int CMP_RESULT; #elif __SIZEOF_POINTER__ == 8 && __SIZEOF_LONG__ == 4 diff --git a/compiler-rt/lib/builtins/fp_lib.h b/compiler-rt/lib/builtins/fp_lib.h index 95b24aac1ff1d..fae58497a8f80 100644 --- a/compiler-rt/lib/builtins/fp_lib.h +++ b/compiler-rt/lib/builtins/fp_lib.h @@ -359,7 +359,7 @@ static __inline fp_t __compiler_rt_scalbn(fp_t x, int y) { return __compiler_rt_scalbnX(x, y); } static __inline fp_t __compiler_rt_fmax(fp_t x, fp_t y) { -#if defined(__aarch64__) || defined(__arm64ec__) +#if defined(__aarch64__) // Use __builtin_fmax which turns into an fmaxnm instruction on AArch64. return __builtin_fmax(x, y); #else diff --git a/compiler-rt/lib/builtins/udivmodti4.c b/compiler-rt/lib/builtins/udivmodti4.c index 6ce213fd5f2a4..55def37c9e1fe 100644 --- a/compiler-rt/lib/builtins/udivmodti4.c +++ b/compiler-rt/lib/builtins/udivmodti4.c @@ -83,7 +83,7 @@ static inline du_int udiv128by64to64default(du_int u1, du_int u0, du_int v, static inline du_int udiv128by64to64(du_int u1, du_int u0, du_int v, du_int *r) { -#if defined(__x86_64__) && !defined(__arm64ec__) +#if defined(__x86_64__) du_int result; __asm__("divq %[v]" : "=a"(result), "=d"(*r) diff --git a/compiler-rt/test/builtins/Unit/enable_execute_stack_test.c b/compiler-rt/test/builtins/Unit/enable_execute_stack_test.c index b3cb4df005ca5..eb1fa97797ac8 100644 --- a/compiler-rt/test/builtins/Unit/enable_execute_stack_test.c +++ b/compiler-rt/test/builtins/Unit/enable_execute_stack_test.c @@ -10,22 +10,9 @@ extern void __enable_execute_stack(void* addr); typedef int (*pfunc)(void); -#ifdef __arm64ec__ -// On ARM64EC, we need the x86_64 version of this function, but the compiler -// would normally generate the AArch64 variant, so we hardcode it here. -static char func1[] = { - 0xb8, 0x01, 0x00, 0x00, 0x00, // movl $0x1, %eax - 0xc3 // retq -}; -static char func2[] = { - 0xb8, 0x02, 0x00, 0x00, 0x00, // movl $0x2, %eax - 0xc3 // retq -}; -#else // Make these static to avoid ILT jumps for incremental linking on Windows. static int func1() { return 1; } static int func2() { return 2; } -#endif void *__attribute__((noinline)) memcpy_f(void *dst, const void *src, size_t n) { diff --git a/compiler-rt/test/builtins/Unit/fixunstfdi_test.c b/compiler-rt/test/builtins/Unit/fixunstfdi_test.c index 982f3a4629dbd..d9f02bf472b5a 100644 --- a/compiler-rt/test/builtins/Unit/fixunstfdi_test.c +++ b/compiler-rt/test/builtins/Unit/fixunstfdi_test.c @@ -4,7 +4,7 @@ #include -#if _ARCH_PPC || __aarch64__ || __arm64ec__ +#if _ARCH_PPC || __aarch64__ #include "int_lib.h" @@ -35,7 +35,7 @@ char assumption_3[sizeof(long double)*CHAR_BIT == 128] = {0}; int main() { -#if _ARCH_PPC || __aarch64__ || __arm64ec__ +#if _ARCH_PPC || __aarch64__ if (test__fixunstfdi(0.0, 0)) return 1; diff --git a/compiler-rt/test/builtins/Unit/multc3_test.c b/compiler-rt/test/builtins/Unit/multc3_test.c index e9c99a72be35e..06f55a68d991a 100644 --- a/compiler-rt/test/builtins/Unit/multc3_test.c +++ b/compiler-rt/test/builtins/Unit/multc3_test.c @@ -4,7 +4,7 @@ #include -#if _ARCH_PPC || __aarch64__ || __arm64ec__ +#if _ARCH_PPC || __aarch64__ #include "int_lib.h" #include @@ -348,7 +348,7 @@ long double x[][2] = int main() { -#if _ARCH_PPC || __aarch64__ || __arm64ec__ +#if _ARCH_PPC || __aarch64__ const unsigned N = sizeof(x) / sizeof(x[0]); unsigned i, j; for (i = 0; i < N; ++i) diff --git a/flang-rt/lib/runtime/assign.cpp b/flang-rt/lib/runtime/assign.cpp index 9be75da9520e3..4a813cd489022 100644 --- a/flang-rt/lib/runtime/assign.cpp +++ b/flang-rt/lib/runtime/assign.cpp @@ -79,18 +79,15 @@ static RT_API_ATTRS int AllocateAssignmentLHS( to.raw().elem_len = from.ElementBytes(); } const typeInfo::DerivedType *derived{nullptr}; - DescriptorAddendum *toAddendum{to.Addendum()}; if (const DescriptorAddendum * fromAddendum{from.Addendum()}) { derived = fromAddendum->derivedType(); - if (toAddendum) { + if (DescriptorAddendum * toAddendum{to.Addendum()}) { toAddendum->set_derivedType(derived); std::size_t lenParms{derived ? derived->LenParameters() : 0}; for (std::size_t j{0}; j < lenParms; ++j) { toAddendum->SetLenParameterValue(j, fromAddendum->LenParameterValue(j)); } } - } else if (toAddendum) { - toAddendum->set_derivedType(nullptr); } // subtle: leave bounds in place when "from" is scalar (10.2.1.3(3)) int rank{from.rank()}; diff --git a/flang/docs/ModFiles.md b/flang/docs/ModFiles.md index fc05c2677fc26..dd0ade5cebbfc 100644 --- a/flang/docs/ModFiles.md +++ b/flang/docs/ModFiles.md @@ -171,14 +171,3 @@ modules of dependent libraries need not also be packaged with the library. When the compiler reads a hermetic module file, the copies of the dependent modules are read into their own scope, and will not conflict with other modules of the same name that client code might `USE`. - -One can use the `-fhermetic-module-files` option when building the top-level -module files of a library for which not all of the implementation modules -will (or can) be shipped. - -It is also possible to convert a default module file to a hermetic one after -the fact. -Since module files are Fortran source, simply copy the module file to a new -temporary free form Fortran source file and recompile it (`-fsyntax-only`) -with the `-fhermetic-module-files` flag, and that will regenerate the module -file in place with all of its dependent modules included. diff --git a/flang/include/flang/Parser/preprocessor.h b/flang/include/flang/Parser/preprocessor.h index 15810a34ee6a5..86528a7e68def 100644 --- a/flang/include/flang/Parser/preprocessor.h +++ b/flang/include/flang/Parser/preprocessor.h @@ -116,7 +116,6 @@ class Preprocessor { bool IsIfPredicateTrue(const TokenSequence &expr, std::size_t first, std::size_t exprTokens, Prescanner &); void LineDirective(const TokenSequence &, std::size_t, Prescanner &); - TokenSequence TokenizeMacroBody(const std::string &); AllSources &allSources_; std::list names_; diff --git a/flang/include/flang/Semantics/symbol.h b/flang/include/flang/Semantics/symbol.h index 4cded64d170cd..97c1e30631840 100644 --- a/flang/include/flang/Semantics/symbol.h +++ b/flang/include/flang/Semantics/symbol.h @@ -600,7 +600,6 @@ class TypeParamDetails { public: TypeParamDetails() = default; TypeParamDetails(const TypeParamDetails &) = default; - TypeParamDetails &operator=(const TypeParamDetails &) = default; std::optional attr() const { return attr_; } TypeParamDetails &set_attr(common::TypeParamAttr); MaybeIntExpr &init() { return init_; } diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index 02454543d0a60..f4876256a378f 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -1407,7 +1407,8 @@ bool ClauseProcessor::processUseDeviceAddr( const parser::CharBlock &source) { mlir::Location location = converter.genLocation(source); llvm::omp::OpenMPOffloadMappingFlags mapTypeBits = - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM; + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO | + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; processMapObjects(stmtCtx, location, clause.v, mapTypeBits, parentMemberIndices, result.useDeviceAddrVars, useDeviceSyms); @@ -1428,7 +1429,8 @@ bool ClauseProcessor::processUseDevicePtr( const parser::CharBlock &source) { mlir::Location location = converter.genLocation(source); llvm::omp::OpenMPOffloadMappingFlags mapTypeBits = - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM; + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO | + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; processMapObjects(stmtCtx, location, clause.v, mapTypeBits, parentMemberIndices, result.useDevicePtrVars, useDeviceSyms); diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 173dceb07b193..3f4cfb8c11a9d 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -398,16 +398,14 @@ mlir::Value createParentSymAndGenIntermediateMaps( interimBounds, treatIndexAsSection); } - // Remove all map-type bits (e.g. TO, FROM, etc.) from the intermediate - // allocatable maps, as we simply wish to alloc or release them. It may - // be safer to just pass OMP_MAP_NONE as the map type, but we may still + // Remove all map TO, FROM and TOFROM bits, from the intermediate + // allocatable maps, we simply wish to alloc or release them. It may be + // safer to just pass OMP_MAP_NONE as the map type, but we may still // need some of the other map types the mapped member utilises, so for // now it's good to keep an eye on this. llvm::omp::OpenMPOffloadMappingFlags interimMapType = mapTypeBits; interimMapType &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; interimMapType &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; - interimMapType &= - ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM; // Create a map for the intermediate member and insert it and it's // indices into the parentMemberIndices list to track it. diff --git a/flang/lib/Parser/preprocessor.cpp b/flang/lib/Parser/preprocessor.cpp index a5de14d864762..6e8e3aee19b09 100644 --- a/flang/lib/Parser/preprocessor.cpp +++ b/flang/lib/Parser/preprocessor.cpp @@ -301,82 +301,8 @@ void Preprocessor::DefineStandardMacros() { Define("__TIMESTAMP__"s, "__TIMESTAMP__"s); } -static const std::string idChars{ - "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789"s}; - -static std::optional> TokenizeMacroNameAndArgs( - const std::string &str) { - // TODO: variadic macros on the command line (?) - std::vector names; - for (std::string::size_type at{0};;) { - auto nameStart{str.find_first_not_of(" "s, at)}; - if (nameStart == str.npos) { - return std::nullopt; - } - auto nameEnd{str.find_first_not_of(idChars, nameStart)}; - if (nameEnd == str.npos) { - return std::nullopt; - } - auto punc{str.find_first_not_of(" "s, nameEnd)}; - if (punc == str.npos) { - return std::nullopt; - } - if ((at == 0 && str[punc] != '(') || - (at > 0 && str[punc] != ',' && str[punc] != ')')) { - return std::nullopt; - } - names.push_back(str.substr(nameStart, nameEnd - nameStart)); - at = punc + 1; - if (str[punc] == ')') { - if (str.find_first_not_of(" "s, at) != str.npos) { - return std::nullopt; - } else { - return names; - } - } - } -} - -TokenSequence Preprocessor::TokenizeMacroBody(const std::string &str) { - TokenSequence tokens; - Provenance provenance{allSources_.AddCompilerInsertion(str).start()}; - auto end{str.size()}; - for (std::string::size_type at{0}; at < end;) { - // Alternate between tokens that are identifiers (and therefore subject - // to argument replacement) and those that are not. - auto start{str.find_first_of(idChars, at)}; - if (start == str.npos) { - tokens.Put(str.substr(at), provenance + at); - break; - } else if (start > at) { - tokens.Put(str.substr(at, start - at), provenance + at); - } - at = str.find_first_not_of(idChars, start + 1); - if (at == str.npos) { - tokens.Put(str.substr(start), provenance + start); - break; - } else { - tokens.Put(str.substr(start, at - start), provenance + start); - } - } - return tokens; -} - void Preprocessor::Define(const std::string ¯o, const std::string &value) { - if (auto lhs{TokenizeMacroNameAndArgs(macro)}) { - // function-like macro - CharBlock macroName{SaveTokenAsName(lhs->front())}; - auto iter{lhs->begin()}; - ++iter; - std::vector argNames{iter, lhs->end()}; - auto rhs{TokenizeMacroBody(value)}; - definitions_.emplace(std::make_pair(macroName, - Definition{ - argNames, rhs, 0, rhs.SizeInTokens(), /*isVariadic=*/false})); - } else { // keyword macro - definitions_.emplace( - SaveTokenAsName(macro), Definition{value, allSources_}); - } + definitions_.emplace(SaveTokenAsName(macro), Definition{value, allSources_}); } void Preprocessor::Undefine(std::string macro) { definitions_.erase(macro); } diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp index b3ad608ee6744..c35492097cfbc 100644 --- a/flang/lib/Semantics/expression.cpp +++ b/flang/lib/Semantics/expression.cpp @@ -4904,19 +4904,6 @@ std::optional ArgumentAnalyzer::AnalyzeExpr( "TYPE(*) dummy argument may only be used as an actual argument"_err_en_US); } else if (MaybeExpr argExpr{AnalyzeExprOrWholeAssumedSizeArray(expr)}) { if (isProcedureCall_ || !IsProcedureDesignator(*argExpr)) { - // Pad Hollerith actual argument with spaces up to a multiple of 8 - // bytes, in case the data are interpreted as double precision - // (or a smaller numeric type) by legacy code. - if (auto hollerith{UnwrapExpr>(*argExpr)}; - hollerith && hollerith->wasHollerith()) { - std::string bytes{hollerith->values()}; - while ((bytes.size() % 8) != 0) { - bytes += ' '; - } - Constant c{std::move(bytes)}; - c.set_wasHollerith(true); - argExpr = AsGenericExpr(std::move(c)); - } ActualArgument arg{std::move(*argExpr)}; SetArgSourceLocation(arg, expr.source); return std::move(arg); diff --git a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir index b13921f822b4d..8019ecf7f6a05 100644 --- a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir +++ b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir @@ -423,15 +423,14 @@ func.func @_QPopenmp_target_data_region() { func.func @_QPomp_target_data_empty() { %0 = fir.alloca !fir.array<1024xi32> {bindc_name = "a", uniq_name = "_QFomp_target_data_emptyEa"} - %1 = omp.map.info var_ptr(%0 : !fir.ref>, !fir.ref>) map_clauses(return_param) capture(ByRef) -> !fir.ref> {name = ""} - omp.target_data use_device_addr(%1 -> %arg0 : !fir.ref>) { + omp.target_data use_device_addr(%0 -> %arg0 : !fir.ref>) { omp.terminator } return } // CHECK-LABEL: llvm.func @_QPomp_target_data_empty -// CHECK: omp.target_data use_device_addr(%{{.*}} -> %{{.*}} : !llvm.ptr) { +// CHECK: omp.target_data use_device_addr(%1 -> %{{.*}} : !llvm.ptr) { // CHECK: } // ----- diff --git a/flang/test/Lower/OpenMP/target.f90 b/flang/test/Lower/OpenMP/target.f90 index f04aacc63fc2b..4815e6564fc7e 100644 --- a/flang/test/Lower/OpenMP/target.f90 +++ b/flang/test/Lower/OpenMP/target.f90 @@ -544,7 +544,7 @@ subroutine omp_target_device_addr !CHECK: %[[VAL_0_DECL:.*]]:2 = hlfir.declare %[[VAL_0]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFomp_target_device_addrEa"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) !CHECK: %[[MAP_MEMBERS:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref>>, i32) map_clauses(tofrom) capture(ByRef) var_ptr_ptr({{.*}} : !fir.llvm_ptr>) -> !fir.llvm_ptr> {name = ""} !CHECK: %[[MAP:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref>>, !fir.box>) map_clauses(to) capture(ByRef) members(%[[MAP_MEMBERS]] : [0] : !fir.llvm_ptr>) -> !fir.ref>> {name = "a"} - !CHECK: %[[DEV_ADDR_MEMBERS:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref>>, i32) map_clauses(return_param) capture(ByRef) var_ptr_ptr({{.*}} : !fir.llvm_ptr>) -> !fir.llvm_ptr> {name = ""} + !CHECK: %[[DEV_ADDR_MEMBERS:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref>>, i32) map_clauses(tofrom) capture(ByRef) var_ptr_ptr({{.*}} : !fir.llvm_ptr>) -> !fir.llvm_ptr> {name = ""} !CHECK: %[[DEV_ADDR:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref>>, !fir.box>) map_clauses(to) capture(ByRef) members(%[[DEV_ADDR_MEMBERS]] : [0] : !fir.llvm_ptr>) -> !fir.ref>> {name = "a"} !CHECK: omp.target_data map_entries(%[[MAP]], %[[MAP_MEMBERS]] : {{.*}}) use_device_addr(%[[DEV_ADDR]] -> %[[ARG_0:.*]], %[[DEV_ADDR_MEMBERS]] -> %[[ARG_1:.*]] : !fir.ref>>, !fir.llvm_ptr>) { !$omp target data map(tofrom: a) use_device_addr(a) diff --git a/flang/test/Preprocessing/func-on-command-line.F90 b/flang/test/Preprocessing/func-on-command-line.F90 deleted file mode 100644 index cf844e021b371..0000000000000 --- a/flang/test/Preprocessing/func-on-command-line.F90 +++ /dev/null @@ -1,4 +0,0 @@ -! RUN: %flang_fc1 -fdebug-unparse "-Dfoo(a,b)=bar(a+b)" %s | FileCheck %s -! CHECK: CALL bar(3_4) -call foo(1,2) -end diff --git a/flang/test/Semantics/pad-hollerith-arg.f b/flang/test/Semantics/pad-hollerith-arg.f deleted file mode 100644 index 75678441ea45f..0000000000000 --- a/flang/test/Semantics/pad-hollerith-arg.f +++ /dev/null @@ -1,5 +0,0 @@ -! RUN: %flang_fc1 -fdebug-unparse %s | FileCheck %s -! Ensure that Hollerith actual arguments are blank padded. -! CHECK: CALL foo("abc ") - call foo(3habc) - end diff --git a/libcxx/docs/CodingGuidelines.rst b/libcxx/docs/CodingGuidelines.rst index ff312d16cf7bb..4a601dffa87ca 100644 --- a/libcxx/docs/CodingGuidelines.rst +++ b/libcxx/docs/CodingGuidelines.rst @@ -124,8 +124,8 @@ Write SFINAE with ``requires`` clauses in C++20-only code subsume other concepts. This means that overloads based on traits can be written without negating more general cases. They also show intent better. -Write ``enable_if`` as ``enable_if_t = 0`` -========================================================== +Write ``enable_if`` as ``enable_if_t = 0`` +========================================================= The form ``enable_if_t = 0`` is the only one that works in every language mode and for overload sets using the same template arguments otherwise. If the code must work in C++11 or C++03, the libc++-internal alias diff --git a/libcxx/docs/DesignDocs/FileTimeType.rst b/libcxx/docs/DesignDocs/FileTimeType.rst index 946c9e515fb9b..f775fd840e236 100644 --- a/libcxx/docs/DesignDocs/FileTimeType.rst +++ b/libcxx/docs/DesignDocs/FileTimeType.rst @@ -33,7 +33,7 @@ which is defined as follows: }; To represent the range and resolution of ``timespec``, we need to (A) have -nanosecond resolution, and (B) use more than 64 bits (assuming a 64-bit ``time_t``). +nanosecond resolution, and (B) use more than 64 bits (assuming a 64 bit ``time_t``). As the standard requires us to use the ``chrono`` interface, we have to define our own filesystem clock which specifies the period and representation of @@ -207,7 +207,7 @@ code in some way: // Overflow during creation bug. file_time_type timespec_to_file_time_type(struct timespec ts) { - // woops! chrono::seconds and chrono::nanoseconds use a 64-bit representation + // woops! chrono::seconds and chrono::nanoseconds use a 64 bit representation // this may overflow before it's converted to a file_time_type. auto dur = seconds(ts.tv_sec) + nanoseconds(ts.tv_nsec); return file_time_type(dur); @@ -272,7 +272,7 @@ look like. The first thing to notice is that we can't construct ``fs_timespec_rep`` like a ``timespec`` by passing ``{secs, nsecs}``. Instead we're limited to -constructing it from a single 64-bit integer. +constructing it from a single 64 bit integer. We also can't allow the user to inspect the ``tv_sec`` or ``tv_nsec`` values directly. A ``chrono::duration`` represents its value as a tick period and a @@ -350,12 +350,12 @@ Though the above example may appear silly, I think it follows from the incorrect notion that using a ``timespec`` rep in chrono actually makes it act as if it were an actual ``timespec``. -Interactions with 32-bit ``time_t`` +Interactions with 32 bit ``time_t`` ----------------------------------- Up until now we've only be considering cases where ``time_t`` is 64 bits, but what -about 32-bit systems/builds where ``time_t`` is 32 bits? (this is the common case -for 32-bit builds). +about 32 bit systems/builds where ``time_t`` is 32 bits? (this is the common case +for 32 bit builds). When ``time_t`` is 32 bits, we can implement ``file_time_type`` simply using 64-bit ``long long``. There is no need to get either ``__int128_t`` or ``timespec`` emulation @@ -431,11 +431,11 @@ Pros: Cons: -* It isn't always available (but on 64-bit machines, it normally is). +* It isn't always available (but on 64 bit machines, it normally is). * It causes ``file_time_type`` to have a larger range than ``timespec``. * It doesn't always act the same as other builtin integer types. For example with ``cout`` or ``to_string``. -* Allows implicit truncation to 64-bit integers. +* Allows implicit truncation to 64 bit integers. * It can be implicitly converted to a builtin integer type by the user, truncating its value. diff --git a/libcxx/docs/TestingLibcxx.rst b/libcxx/docs/TestingLibcxx.rst index 3320f7d2e7691..9c2ac9edb6777 100644 --- a/libcxx/docs/TestingLibcxx.rst +++ b/libcxx/docs/TestingLibcxx.rst @@ -291,7 +291,7 @@ tests using exceptions. The code to write a test manually would be: .. code-block:: cpp - void test_exception([[maybe_unused]] int arg) { + void test_excption([[maybe_unused]] int arg) { #ifndef TEST_HAS_NO_EXCEPTIONS // do nothing when tests are disabled try { foo(arg); @@ -308,7 +308,7 @@ The same test using a macro: .. code-block:: cpp - void test_exception([[maybe_unused]] int arg) { + void test_excption([[maybe_unused]] int arg) { TEST_VALIDATE_EXCEPTION(bar, [](const bar& e) { LIBCPP_ASSERT(e.what() == what); diff --git a/libcxx/include/__format/format_functions.h b/libcxx/include/__format/format_functions.h index 873265bc17c24..74fec9f2761e0 100644 --- a/libcxx/include/__format/format_functions.h +++ b/libcxx/include/__format/format_functions.h @@ -11,8 +11,6 @@ #define _LIBCPP___FORMAT_FORMAT_FUNCTIONS #include <__algorithm/clamp.h> -#include <__algorithm/ranges_find_first_of.h> -#include <__chrono/statically_widen.h> #include <__concepts/convertible_to.h> #include <__concepts/same_as.h> #include <__config> @@ -38,7 +36,6 @@ #include <__iterator/iterator_traits.h> // iter_value_t #include <__variant/monostate.h> #include -#include #include #include @@ -450,47 +447,10 @@ format_to(_OutIt __out_it, wformat_string<_Args...> __fmt, _Args&&... __args) { } # endif -// Try constant folding the format string instead of going through the whole formatting machinery. If there is no -// constant folding no extra code should be emitted (with optimizations enabled) and the function returns nullopt. When -// constant folding is successful, the formatting is performed and the resulting string is returned. -namespace __format { -template -[[nodiscard]] _LIBCPP_HIDE_FROM_ABI optional> __try_constant_folding( - basic_string_view<_CharT> __fmt, - basic_format_args>, _CharT>> __args) { - // Fold strings not containing '{' or '}' to just return the string - if (bool __is_identity = [&] [[__gnu__::__pure__]] // Make sure the compiler knows this call can be eliminated - { return std::ranges::find_first_of(__fmt, array{'{', '}'}) == __fmt.end(); }(); - __builtin_constant_p(__is_identity) && __is_identity) - return basic_string<_CharT>{__fmt}; - - // Fold '{}' to the appropriate conversion function - if (auto __only_first_arg = __fmt == _LIBCPP_STATICALLY_WIDEN(_CharT, "{}"); - __builtin_constant_p(__only_first_arg) && __only_first_arg) { - if (auto __arg = __args.get(0); __builtin_constant_p(__arg.__type_)) { - return std::__visit_format_arg( - [](_Tp&& __argument) -> optional> { - if constexpr (is_same_v, basic_string_view<_CharT>>) { - return basic_string<_CharT>{__argument}; - } else { - return nullopt; - } - }, - __arg); - } - } - - return nullopt; -} -} // namespace __format - // TODO FMT This needs to be a template or std::to_chars(floating-point) availability markup // fires too eagerly, see http://llvm.org/PR61563. template [[nodiscard]] _LIBCPP_ALWAYS_INLINE inline _LIBCPP_HIDE_FROM_ABI string vformat(string_view __fmt, format_args __args) { - auto __result = __format::__try_constant_folding(__fmt, __args); - if (__result.has_value()) - return *std::move(__result); __format::__allocating_buffer __buffer; std::vformat_to(__buffer.__make_output_iterator(), __fmt, __args); return string{__buffer.__view()}; @@ -502,9 +462,6 @@ template template [[nodiscard]] _LIBCPP_ALWAYS_INLINE inline _LIBCPP_HIDE_FROM_ABI wstring vformat(wstring_view __fmt, wformat_args __args) { - auto __result = __format::__try_constant_folding(__fmt, __args); - if (__result.has_value()) - return *std::move(__result); __format::__allocating_buffer __buffer; std::vformat_to(__buffer.__make_output_iterator(), __fmt, __args); return wstring{__buffer.__view()}; diff --git a/libcxx/include/__fwd/pair.h b/libcxx/include/__fwd/pair.h index cf07eabab6903..ea81a81ef8e11 100644 --- a/libcxx/include/__fwd/pair.h +++ b/libcxx/include/__fwd/pair.h @@ -22,12 +22,6 @@ _LIBCPP_BEGIN_NAMESPACE_STD template struct pair; -template -inline const bool __is_pair_v = false; - -template -inline const bool __is_pair_v > = true; - template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 typename tuple_element<_Ip, pair<_T1, _T2> >::type& get(pair<_T1, _T2>&) _NOEXCEPT; diff --git a/libcxx/include/__memory/uses_allocator_construction.h b/libcxx/include/__memory/uses_allocator_construction.h index 49ddf99d9cc95..955879ffc5845 100644 --- a/libcxx/include/__memory/uses_allocator_construction.h +++ b/libcxx/include/__memory/uses_allocator_construction.h @@ -14,6 +14,7 @@ #include <__memory/uses_allocator.h> #include <__tuple/tuple_like_no_subrange.h> #include <__type_traits/enable_if.h> +#include <__type_traits/is_same.h> #include <__type_traits/remove_cv.h> #include <__utility/declval.h> #include <__utility/pair.h> @@ -30,8 +31,14 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 17 +template +inline constexpr bool __is_std_pair = false; + +template +inline constexpr bool __is_std_pair> = true; + template -inline constexpr bool __is_cv_std_pair = __is_pair_v>; +inline constexpr bool __is_cv_std_pair = __is_std_pair>; template struct __uses_allocator_construction_args; diff --git a/libcxx/include/__node_handle b/libcxx/include/__node_handle index 5c559c657ef50..08c4ffa5ff17b 100644 --- a/libcxx/include/__node_handle +++ b/libcxx/include/__node_handle @@ -62,7 +62,6 @@ public: #include <__config> #include <__memory/allocator_traits.h> #include <__memory/pointer_traits.h> -#include <__type_traits/is_specialization.h> #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -174,40 +173,17 @@ struct __set_node_handle_specifics { _LIBCPP_HIDE_FROM_ABI value_type& value() const { return static_cast<_Derived const*>(this)->__ptr_->__get_value(); } }; -template -struct __hash_value_type; - template struct __map_node_handle_specifics { - template - struct __get_type { - using key_type = __remove_const_t; - using mapped_type = typename _Tp::second_type; - }; - - template - struct __get_type<__hash_value_type<_Key, _Mapped> > { - using key_type = _Key; - using mapped_type = _Mapped; - }; - - using key_type = typename __get_type::key_type; - using mapped_type = typename __get_type::mapped_type; + typedef typename _NodeType::__node_value_type::key_type key_type; + typedef typename _NodeType::__node_value_type::mapped_type mapped_type; _LIBCPP_HIDE_FROM_ABI key_type& key() const { - if constexpr (__is_specialization_v) { - return static_cast<_Derived const*>(this)->__ptr_->__get_value().__ref().first; - } else { - return const_cast(static_cast<_Derived const*>(this)->__ptr_->__get_value().first); - } + return static_cast<_Derived const*>(this)->__ptr_->__get_value().__ref().first; } _LIBCPP_HIDE_FROM_ABI mapped_type& mapped() const { - if constexpr (__is_specialization_v) { - return static_cast<_Derived const*>(this)->__ptr_->__get_value().__ref().second; - } else { - return static_cast<_Derived const*>(this)->__ptr_->__get_value().second; - } + return static_cast<_Derived const*>(this)->__ptr_->__get_value().__ref().second; } }; diff --git a/libcxx/include/__tree b/libcxx/include/__tree index 1903533898481..bbf7c71962e93 100644 --- a/libcxx/include/__tree +++ b/libcxx/include/__tree @@ -14,7 +14,6 @@ #include <__assert> #include <__config> #include <__fwd/map.h> -#include <__fwd/pair.h> #include <__fwd/set.h> #include <__iterator/distance.h> #include <__iterator/iterator_traits.h> @@ -26,7 +25,6 @@ #include <__memory/swap_allocator.h> #include <__memory/unique_ptr.h> #include <__type_traits/can_extract_key.h> -#include <__type_traits/copy_cvref.h> #include <__type_traits/enable_if.h> #include <__type_traits/invoke.h> #include <__type_traits/is_const.h> @@ -507,24 +505,48 @@ struct __is_tree_value_type<_One> : __is_tree_value_type_imp<__remove_cvref_t<_O template struct __tree_key_value_types { typedef _Tp key_type; + typedef _Tp __node_value_type; typedef _Tp __container_value_type; static const bool __is_map = false; _LIBCPP_HIDE_FROM_ABI static key_type const& __get_key(_Tp const& __v) { return __v; } + _LIBCPP_HIDE_FROM_ABI static __container_value_type const& __get_value(__node_value_type const& __v) { return __v; } + _LIBCPP_HIDE_FROM_ABI static __container_value_type* __get_ptr(__node_value_type& __n) { return std::addressof(__n); } + _LIBCPP_HIDE_FROM_ABI static __container_value_type&& __move(__node_value_type& __v) { return std::move(__v); } }; template struct __tree_key_value_types<__value_type<_Key, _Tp> > { typedef _Key key_type; typedef _Tp mapped_type; + typedef __value_type<_Key, _Tp> __node_value_type; typedef pair __container_value_type; typedef __container_value_type __map_value_type; static const bool __is_map = true; + _LIBCPP_HIDE_FROM_ABI static key_type const& __get_key(__node_value_type const& __t) { + return __t.__get_value().first; + } + template ::value, int> = 0> _LIBCPP_HIDE_FROM_ABI static key_type const& __get_key(_Up& __t) { return __t.first; } + + _LIBCPP_HIDE_FROM_ABI static __container_value_type const& __get_value(__node_value_type const& __t) { + return __t.__get_value(); + } + + template ::value, int> = 0> + _LIBCPP_HIDE_FROM_ABI static __container_value_type const& __get_value(_Up& __t) { + return __t; + } + + _LIBCPP_HIDE_FROM_ABI static __container_value_type* __get_ptr(__node_value_type& __n) { + return std::addressof(__n.__get_value()); + } + + _LIBCPP_HIDE_FROM_ABI static pair __move(__node_value_type& __v) { return __v.__move(); } }; template @@ -565,19 +587,6 @@ struct __tree_map_pointer_types<_Tp, _AllocPtr, _KVTypes, true> { typedef __rebind_pointer_t<_AllocPtr, const _Mv> __const_map_value_type_pointer; }; -template -struct __get_node_value_type { - using type _LIBCPP_NODEBUG = _Tp; -}; - -template -struct __get_node_value_type<__value_type<_Key, _ValueT> > { - using type _LIBCPP_NODEBUG = pair; -}; - -template -using __get_node_value_type_t _LIBCPP_NODEBUG = typename __get_node_value_type<_Tp>::type; - template ::element_type> struct __tree_node_types; @@ -590,7 +599,7 @@ public: typedef typename pointer_traits<_NodePtr>::element_type __node_type; typedef _NodePtr __node_pointer; - using __node_value_type _LIBCPP_NODEBUG = __get_node_value_type_t<_Tp>; + typedef _Tp __node_value_type; typedef __rebind_pointer_t<_VoidPtr, __node_value_type> __node_value_type_pointer; typedef __rebind_pointer_t<_VoidPtr, const __node_value_type> __const_node_value_type_pointer; @@ -641,11 +650,11 @@ public: template class _LIBCPP_STANDALONE_DEBUG __tree_node : public __tree_node_base<_VoidPtr> { public: - using __node_value_type _LIBCPP_NODEBUG = __get_node_value_type_t<_Tp>; + typedef _Tp __node_value_type; __node_value_type __value_; - _LIBCPP_HIDE_FROM_ABI __node_value_type& __get_value() { return __value_; } + _LIBCPP_HIDE_FROM_ABI _Tp& __get_value() { return __value_; } ~__tree_node() = delete; __tree_node(__tree_node const&) = delete; @@ -676,7 +685,7 @@ public: _LIBCPP_HIDE_FROM_ABI void operator()(pointer __p) _NOEXCEPT { if (__value_constructed) - __alloc_traits::destroy(__na_, std::addressof(__p->__value_)); + __alloc_traits::destroy(__na_, _NodeTypes::__get_ptr(__p->__value_)); if (__p) __alloc_traits::deallocate(__na_, __p, 1); } @@ -706,7 +715,7 @@ class __tree_iterator { public: typedef bidirectional_iterator_tag iterator_category; - using value_type = __get_node_value_type_t<_Tp>; + typedef _Tp value_type; typedef _DiffType difference_type; typedef value_type& reference; typedef typename _NodeTypes::__node_value_type_pointer pointer; @@ -780,7 +789,7 @@ class __tree_const_iterator { public: typedef bidirectional_iterator_tag iterator_category; - using value_type = __get_node_value_type_t<_Tp>; + typedef _Tp value_type; typedef _DiffType difference_type; typedef const value_type& reference; typedef typename _NodeTypes::__const_node_value_type_pointer pointer; @@ -793,7 +802,7 @@ public: } private: - typedef __tree_iterator<_Tp, __node_pointer, difference_type> __non_const_iterator; + typedef __tree_iterator __non_const_iterator; public: _LIBCPP_HIDE_FROM_ABI __tree_const_iterator(__non_const_iterator __p) _NOEXCEPT : __ptr_(__p.__ptr_) {} @@ -1098,18 +1107,6 @@ public: return __emplace_hint_unique(__p, std::forward<_Vp>(__v)); } - template ::value, int> = 0> - _LIBCPP_HIDE_FROM_ABI void - __insert_unique_from_orphaned_node(const_iterator __p, __get_node_value_type_t<_Tp>&& __value) { - using __key_type = typename _NodeTypes::key_type; - __emplace_hint_unique(__p, const_cast<__key_type&&>(__value.first), std::move(__value.second)); - } - - template ::value, int> = 0> - _LIBCPP_HIDE_FROM_ABI void __insert_unique_from_orphaned_node(const_iterator __p, _Tp&& __value) { - __emplace_hint_unique(__p, std::move(__value)); - } - _LIBCPP_HIDE_FROM_ABI iterator __insert_multi(__container_value_type&& __v) { return __emplace_multi(std::move(__v)); } @@ -1128,18 +1125,6 @@ public: return __emplace_hint_multi(__p, std::forward<_Vp>(__v)); } - template ::value, int> = 0> - _LIBCPP_HIDE_FROM_ABI void - __insert_multi_from_orphaned_node(const_iterator __p, __get_node_value_type_t<_Tp>&& __value) { - using __key_type = typename _NodeTypes::key_type; - __emplace_hint_multi(__p, const_cast<__key_type&&>(__value.first), std::move(__value.second)); - } - - template ::value, int> = 0> - _LIBCPP_HIDE_FROM_ABI void __insert_multi_from_orphaned_node(const_iterator __p, _Tp&& __value) { - __emplace_hint_multi(__p, std::move(__value)); - } - _LIBCPP_HIDE_FROM_ABI pair __node_assign_unique(const __container_value_type& __v, __node_pointer __dest); @@ -1281,21 +1266,6 @@ private: } _LIBCPP_HIDE_FROM_ABI void __move_assign_alloc(__tree&, false_type) _NOEXCEPT {} - template >, int> = 0> - _LIBCPP_HIDE_FROM_ABI static void __assign_value(__get_node_value_type_t& __lhs, _From&& __rhs) { - using __key_type = typename _NodeTypes::key_type; - - // This is technically UB, since the object was constructed as `const`. - // Clang doesn't optimize on this currently though. - const_cast<__key_type&>(__lhs.first) = const_cast<__copy_cvref_t<_From, __key_type>&&>(__rhs.first); - __lhs.second = std::forward<_From>(__rhs).second; - } - - template >, int> = 0> - _LIBCPP_HIDE_FROM_ABI static void __assign_value(_To& __lhs, _From&& __rhs) { - __lhs = std::forward<_From>(__rhs); - } - struct _DetachedTreeCache { _LIBCPP_HIDE_FROM_ABI explicit _DetachedTreeCache(__tree* __t) _NOEXCEPT : __t_(__t), @@ -1436,14 +1406,14 @@ void __tree<_Tp, _Compare, _Allocator>::__assign_multi(_InputIterator __first, _ if (size() != 0) { _DetachedTreeCache __cache(this); for (; __cache.__get() && __first != __last; ++__first) { - __assign_value(__cache.__get()->__value_, *__first); + __cache.__get()->__value_ = *__first; __node_insert_multi(__cache.__get()); __cache.__advance(); } } const_iterator __e = end(); for (; __first != __last; ++__first) - __insert_multi(__e, *__first); + __insert_multi(__e, _NodeTypes::__get_value(*__first)); } template @@ -1522,14 +1492,13 @@ void __tree<_Tp, _Compare, _Allocator>::__move_assign(__tree& __t, false_type) { if (size() != 0) { _DetachedTreeCache __cache(this); while (__cache.__get() != nullptr && __t.size() != 0) { - __assign_value(__cache.__get()->__value_, std::move(__t.remove(__t.begin())->__value_)); + __cache.__get()->__value_ = std::move(__t.remove(__t.begin())->__value_); __node_insert_multi(__cache.__get()); __cache.__advance(); } } - while (__t.size() != 0) { - __insert_multi_from_orphaned_node(__e, std::move(__t.remove(__t.begin())->__value_)); - } + while (__t.size() != 0) + __insert_multi(__e, _NodeTypes::__move(__t.remove(__t.begin())->__value_)); } } @@ -1555,7 +1524,7 @@ void __tree<_Tp, _Compare, _Allocator>::destroy(__node_pointer __nd) _NOEXCEPT { destroy(static_cast<__node_pointer>(__nd->__left_)); destroy(static_cast<__node_pointer>(__nd->__right_)); __node_allocator& __na = __node_alloc(); - __node_traits::destroy(__na, std::addressof(__nd->__value_)); + __node_traits::destroy(__na, _NodeTypes::__get_ptr(__nd->__value_)); __node_traits::deallocate(__na, __nd, 1); } } @@ -1825,9 +1794,10 @@ template template typename __tree<_Tp, _Compare, _Allocator>::__node_holder __tree<_Tp, _Compare, _Allocator>::__construct_node(_Args&&... __args) { + static_assert(!__is_tree_value_type<_Args...>::value, "Cannot construct from __value_type"); __node_allocator& __na = __node_alloc(); __node_holder __h(__node_traits::allocate(__na, 1), _Dp(__na)); - __node_traits::construct(__na, std::addressof(__h->__value_), std::forward<_Args>(__args)...); + __node_traits::construct(__na, _NodeTypes::__get_ptr(__h->__value_), std::forward<_Args>(__args)...); __h.get_deleter().__value_constructed = true; return __h; } @@ -1895,7 +1865,7 @@ __tree<_Tp, _Compare, _Allocator>::__node_assign_unique(const __container_value_ __node_pointer __r = static_cast<__node_pointer>(__child); bool __inserted = false; if (__child == nullptr) { - __assign_value(__nd->__value_, __v); + __nd->__value_ = __v; __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__nd)); __r = __nd; __inserted = true; @@ -2057,7 +2027,7 @@ typename __tree<_Tp, _Compare, _Allocator>::iterator __tree<_Tp, _Compare, _Allo __node_pointer __np = __p.__get_np(); iterator __r = __remove_node_pointer(__np); __node_allocator& __na = __node_alloc(); - __node_traits::destroy(__na, std::addressof(const_cast<__node_value_type&>(*__p))); + __node_traits::destroy(__na, _NodeTypes::__get_ptr(const_cast<__node_value_type&>(*__p))); __node_traits::deallocate(__na, __np, 1); return __r; } diff --git a/libcxx/include/map b/libcxx/include/map index 1f650d4f4c3d5..a244696295fb8 100644 --- a/libcxx/include/map +++ b/libcxx/include/map @@ -593,6 +593,7 @@ erase_if(multimap& c, Predicate pred); // C++20 # include <__memory/pointer_traits.h> # include <__memory/unique_ptr.h> # include <__memory_resource/polymorphic_allocator.h> +# include <__new/launder.h> # include <__node_handle> # include <__ranges/concepts.h> # include <__ranges/container_compatible_range.h> @@ -644,13 +645,13 @@ public: : _Compare(__c) {} _LIBCPP_HIDE_FROM_ABI const _Compare& key_comp() const _NOEXCEPT { return *this; } _LIBCPP_HIDE_FROM_ABI bool operator()(const _CP& __x, const _CP& __y) const { - return static_cast(*this)(__x.first, __y.first); + return static_cast(*this)(__x.__get_value().first, __y.__get_value().first); } _LIBCPP_HIDE_FROM_ABI bool operator()(const _CP& __x, const _Key& __y) const { - return static_cast(*this)(__x.first, __y); + return static_cast(*this)(__x.__get_value().first, __y); } _LIBCPP_HIDE_FROM_ABI bool operator()(const _Key& __x, const _CP& __y) const { - return static_cast(*this)(__x, __y.first); + return static_cast(*this)(__x, __y.__get_value().first); } _LIBCPP_HIDE_FROM_ABI void swap(__map_value_compare& __y) _NOEXCEPT_(__is_nothrow_swappable_v<_Compare>) { using std::swap; @@ -660,12 +661,12 @@ public: # if _LIBCPP_STD_VER >= 14 template _LIBCPP_HIDE_FROM_ABI bool operator()(const _K2& __x, const _CP& __y) const { - return static_cast(*this)(__x, __y.first); + return static_cast(*this)(__x, __y.__get_value().first); } template _LIBCPP_HIDE_FROM_ABI bool operator()(const _CP& __x, const _K2& __y) const { - return static_cast(*this)(__x.first, __y); + return static_cast(*this)(__x.__get_value().first, __y); } # endif }; @@ -681,9 +682,15 @@ public: : __comp_(__c) {} _LIBCPP_HIDE_FROM_ABI const _Compare& key_comp() const _NOEXCEPT { return __comp_; } - _LIBCPP_HIDE_FROM_ABI bool operator()(const _CP& __x, const _CP& __y) const { return __comp_(__x.first, __y.first); } - _LIBCPP_HIDE_FROM_ABI bool operator()(const _CP& __x, const _Key& __y) const { return __comp_(__x.first, __y); } - _LIBCPP_HIDE_FROM_ABI bool operator()(const _Key& __x, const _CP& __y) const { return __comp_(__x, __y.first); } + _LIBCPP_HIDE_FROM_ABI bool operator()(const _CP& __x, const _CP& __y) const { + return __comp_(__x.__get_value().first, __y.__get_value().first); + } + _LIBCPP_HIDE_FROM_ABI bool operator()(const _CP& __x, const _Key& __y) const { + return __comp_(__x.__get_value().first, __y); + } + _LIBCPP_HIDE_FROM_ABI bool operator()(const _Key& __x, const _CP& __y) const { + return __comp_(__x, __y.__get_value().first); + } void swap(__map_value_compare& __y) _NOEXCEPT_(__is_nothrow_swappable_v<_Compare>) { using std::swap; swap(__comp_, __y.__comp_); @@ -742,9 +749,9 @@ public: _LIBCPP_HIDE_FROM_ABI void operator()(pointer __p) _NOEXCEPT { if (__second_constructed) - __alloc_traits::destroy(__na_, std::addressof(__p->__value_.second)); + __alloc_traits::destroy(__na_, std::addressof(__p->__value_.__get_value().second)); if (__first_constructed) - __alloc_traits::destroy(__na_, std::addressof(__p->__value_.first)); + __alloc_traits::destroy(__na_, std::addressof(__p->__value_.__get_value().first)); if (__p) __alloc_traits::deallocate(__na_, __p, 1); } @@ -753,8 +760,90 @@ public: template class __map_const_iterator; +# ifndef _LIBCPP_CXX03_LANG + template -struct __value_type; +struct _LIBCPP_STANDALONE_DEBUG __value_type { + typedef _Key key_type; + typedef _Tp mapped_type; + typedef pair value_type; + typedef pair __nc_ref_pair_type; + typedef pair __nc_rref_pair_type; + +private: + value_type __cc_; + +public: + _LIBCPP_HIDE_FROM_ABI value_type& __get_value() { +# if _LIBCPP_STD_VER >= 17 + return *std::launder(std::addressof(__cc_)); +# else + return __cc_; +# endif + } + + _LIBCPP_HIDE_FROM_ABI const value_type& __get_value() const { +# if _LIBCPP_STD_VER >= 17 + return *std::launder(std::addressof(__cc_)); +# else + return __cc_; +# endif + } + + _LIBCPP_HIDE_FROM_ABI __nc_ref_pair_type __ref() { + value_type& __v = __get_value(); + return __nc_ref_pair_type(const_cast(__v.first), __v.second); + } + + _LIBCPP_HIDE_FROM_ABI __nc_rref_pair_type __move() { + value_type& __v = __get_value(); + return __nc_rref_pair_type(std::move(const_cast(__v.first)), std::move(__v.second)); + } + + _LIBCPP_HIDE_FROM_ABI __value_type& operator=(const __value_type& __v) { + __ref() = __v.__get_value(); + return *this; + } + + _LIBCPP_HIDE_FROM_ABI __value_type& operator=(__value_type&& __v) { + __ref() = __v.__move(); + return *this; + } + + template ::value, int> = 0> + _LIBCPP_HIDE_FROM_ABI __value_type& operator=(_ValueTp&& __v) { + __ref() = std::forward<_ValueTp>(__v); + return *this; + } + + __value_type() = delete; + ~__value_type() = delete; + __value_type(const __value_type&) = delete; + __value_type(__value_type&&) = delete; +}; + +# else + +template +struct __value_type { + typedef _Key key_type; + typedef _Tp mapped_type; + typedef pair value_type; + +private: + value_type __cc_; + +public: + _LIBCPP_HIDE_FROM_ABI value_type& __get_value() { return __cc_; } + _LIBCPP_HIDE_FROM_ABI const value_type& __get_value() const { return __cc_; } + + __value_type() = delete; + __value_type(__value_type const&) = delete; + __value_type& operator=(__value_type const&) = delete; + ~__value_type() = delete; +}; + +# endif // _LIBCPP_CXX03_LANG template struct __extract_key_value_types; @@ -783,8 +872,8 @@ public: _LIBCPP_HIDE_FROM_ABI __map_iterator(_TreeIterator __i) _NOEXCEPT : __i_(__i) {} - _LIBCPP_HIDE_FROM_ABI reference operator*() const { return *__i_; } - _LIBCPP_HIDE_FROM_ABI pointer operator->() const { return pointer_traits::pointer_to(*__i_); } + _LIBCPP_HIDE_FROM_ABI reference operator*() const { return __i_->__get_value(); } + _LIBCPP_HIDE_FROM_ABI pointer operator->() const { return pointer_traits::pointer_to(__i_->__get_value()); } _LIBCPP_HIDE_FROM_ABI __map_iterator& operator++() { ++__i_; @@ -841,8 +930,8 @@ public: _LIBCPP_HIDE_FROM_ABI __map_const_iterator(__map_iterator< typename _TreeIterator::__non_const_iterator> __i) _NOEXCEPT : __i_(__i.__i_) {} - _LIBCPP_HIDE_FROM_ABI reference operator*() const { return *__i_; } - _LIBCPP_HIDE_FROM_ABI pointer operator->() const { return pointer_traits::pointer_to(*__i_); } + _LIBCPP_HIDE_FROM_ABI reference operator*() const { return __i_->__get_value(); } + _LIBCPP_HIDE_FROM_ABI pointer operator->() const { return pointer_traits::pointer_to(__i_->__get_value()); } _LIBCPP_HIDE_FROM_ABI __map_const_iterator& operator++() { ++__i_; @@ -910,7 +999,7 @@ public: private: typedef std::__value_type __value_type; - typedef __map_value_compare __vc; + typedef __map_value_compare __vc; typedef __rebind_alloc, __value_type> __allocator_type; typedef __tree<__value_type, __vc, __allocator_type> __base; typedef typename __base::__node_traits __node_traits; @@ -1216,7 +1305,7 @@ public: __tree_.__emplace_hint_unique_key_args(__h.__i_, __k, std::move(__k), std::forward<_Vp>(__v)); if (!__inserted) - __r->second = std::forward<_Vp>(__v); + __r->__get_value().second = std::forward<_Vp>(__v); return __r; } @@ -1421,9 +1510,8 @@ map<_Key, _Tp, _Compare, _Allocator>::map(map&& __m, const allocator_type& __a) : __tree_(std::move(__m.__tree_), typename __base::allocator_type(__a)) { if (__a != __m.get_allocator()) { const_iterator __e = cend(); - while (!__m.empty()) { - __tree_.__insert_unique_from_orphaned_node(__e.__i_, std::move(__m.__tree_.remove(__m.begin().__i_)->__value_)); - } + while (!__m.empty()) + __tree_.__insert_unique(__e.__i_, __m.__tree_.remove(__m.begin().__i_)->__value_.__move()); } } @@ -1431,7 +1519,8 @@ template _Tp& map<_Key, _Tp, _Compare, _Allocator>::operator[](const key_type& __k) { return __tree_ .__emplace_unique_key_args(__k, std::piecewise_construct, std::forward_as_tuple(__k), std::forward_as_tuple()) - .first->second; + .first->__get_value() + .second; } template @@ -1441,7 +1530,8 @@ _Tp& map<_Key, _Tp, _Compare, _Allocator>::operator[](key_type&& __k) { return __tree_ .__emplace_unique_key_args( __k, std::piecewise_construct, std::forward_as_tuple(std::move(__k)), std::forward_as_tuple()) - .first->second; + .first->__get_value() + .second; // NOLINTEND(bugprone-use-after-move) } @@ -1452,9 +1542,9 @@ typename map<_Key, _Tp, _Compare, _Allocator>::__node_holder map<_Key, _Tp, _Compare, _Allocator>::__construct_node_with_key(const key_type& __k) { __node_allocator& __na = __tree_.__node_alloc(); __node_holder __h(__node_traits::allocate(__na, 1), _Dp(__na)); - __node_traits::construct(__na, std::addressof(__h->__value_.first), __k); + __node_traits::construct(__na, std::addressof(__h->__value_.__get_value().first), __k); __h.get_deleter().__first_constructed = true; - __node_traits::construct(__na, std::addressof(__h->__value_.second)); + __node_traits::construct(__na, std::addressof(__h->__value_.__get_value().second)); __h.get_deleter().__second_constructed = true; return __h; } @@ -1469,7 +1559,7 @@ _Tp& map<_Key, _Tp, _Compare, _Allocator>::operator[](const key_type& __k) { __tree_.__insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__h.get())); __r = __h.release(); } - return __r->__value_.second; + return __r->__value_.__get_value().second; } # endif // _LIBCPP_CXX03_LANG @@ -1480,7 +1570,7 @@ _Tp& map<_Key, _Tp, _Compare, _Allocator>::at(const key_type& __k) { __node_base_pointer& __child = __tree_.__find_equal(__parent, __k); if (__child == nullptr) std::__throw_out_of_range("map::at: key not found"); - return static_cast<__node_pointer>(__child)->__value_.second; + return static_cast<__node_pointer>(__child)->__value_.__get_value().second; } template @@ -1489,7 +1579,7 @@ const _Tp& map<_Key, _Tp, _Compare, _Allocator>::at(const key_type& __k) const { __node_base_pointer __child = __tree_.__find_equal(__parent, __k); if (__child == nullptr) std::__throw_out_of_range("map::at: key not found"); - return static_cast<__node_pointer>(__child)->__value_.second; + return static_cast<__node_pointer>(__child)->__value_.__get_value().second; } template @@ -1595,7 +1685,7 @@ public: private: typedef std::__value_type __value_type; - typedef __map_value_compare __vc; + typedef __map_value_compare __vc; typedef __rebind_alloc, __value_type> __allocator_type; typedef __tree<__value_type, __vc, __allocator_type> __base; typedef typename __base::__node_traits __node_traits; @@ -2010,7 +2100,7 @@ multimap<_Key, _Tp, _Compare, _Allocator>::multimap(multimap&& __m, const alloca if (__a != __m.get_allocator()) { const_iterator __e = cend(); while (!__m.empty()) - __tree_.__insert_multi_from_orphaned_node(__e.__i_, std::move(__m.__tree_.remove(__m.begin().__i_)->__value_)); + __tree_.__insert_multi(__e.__i_, std::move(__m.__tree_.remove(__m.begin().__i_)->__value_.__move())); } } # endif diff --git a/libcxx/include/print b/libcxx/include/print index be05d30e0147f..61c3ebcd98cb8 100644 --- a/libcxx/include/print +++ b/libcxx/include/print @@ -123,7 +123,7 @@ _LIBCPP_HIDE_FROM_ABI constexpr void __encode(_OutIt& __out_it, char32_t __value _LIBCPP_ASSERT_UNCATEGORIZED(__is_scalar_value(__value), "an invalid unicode scalar value results in invalid UTF-16"); if (__value < 0x10000) { - *__out_it++ = static_cast>(__value); + *__out_it++ = __value; return; } diff --git a/libcxx/src/.clang-tidy b/libcxx/src/.clang-tidy index 3d5493d965558..ec8f2e0a76a3c 100644 --- a/libcxx/src/.clang-tidy +++ b/libcxx/src/.clang-tidy @@ -1,18 +1,4 @@ InheritParentConfig: true Checks: > - -clang-analyzer-*, - - -llvm-include-order, - - -modernize-loop-convert, - -modernize-use-equals-delete, - -modernize-use-nullptr, - -modernize-use-override, - - -readability-identifier-naming, - -readability-function-cognitive-complexity, - -readability-function-size, - -readability-simplify-boolean-expr, - -# TODO: Consider enabling clang-analyzer. Without the checks clang-tidy runs 18x faster on my system. + -readability-identifier-naming diff --git a/libcxx/test/benchmarks/algorithms/pstl.stable_sort.bench.cpp b/libcxx/test/benchmarks/algorithms/pstl.stable_sort.bench.cpp new file mode 100644 index 0000000000000..a385185ec7fe5 --- /dev/null +++ b/libcxx/test/benchmarks/algorithms/pstl.stable_sort.bench.cpp @@ -0,0 +1,42 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: libcpp-has-no-incomplete-pstl + +#include +#include + +#include "common.h" + +namespace { +template +struct StableSort { + size_t Quantity; + + void run(benchmark::State& state) const { + runOpOnCopies(state, Quantity, Order(), BatchSize::CountBatch, [](auto& Copy) { + std::stable_sort(std::execution::par, Copy.begin(), Copy.end()); + }); + } + + bool skip() const { return Order() == ::Order::Heap; } + + std::string name() const { + return "BM_pstl_stable_sort" + ValueType::name() + Order::name() + "/" + std::to_string(Quantity); + } +}; +} // namespace + +int main(int argc, char** argv) { + benchmark::Initialize(&argc, argv); + if (benchmark::ReportUnrecognizedArguments(argc, argv)) + return 1; + makeCartesianProductBenchmark(Quantities); + benchmark::RunSpecifiedBenchmarks(); +} diff --git a/libcxx/test/benchmarks/algorithms/ranges_sort.bench.cpp b/libcxx/test/benchmarks/algorithms/ranges_sort.bench.cpp new file mode 100644 index 0000000000000..d145a159a21fd --- /dev/null +++ b/libcxx/test/benchmarks/algorithms/ranges_sort.bench.cpp @@ -0,0 +1,40 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 + +#include + +#include "common.h" + +namespace { +template +struct Sort { + size_t Quantity; + + void run(benchmark::State& state) const { + runOpOnCopies(state, Quantity, Order(), BatchSize::CountElements, [](auto& Copy) { + std::ranges::sort(Copy); + }); + } + + bool skip() const { return Order() == ::Order::Heap; } + + std::string name() const { + return "BM_RangesSort" + ValueType::name() + Order::name() + "_" + std::to_string(Quantity); + } +}; +} // namespace + +int main(int argc, char** argv) { + benchmark::Initialize(&argc, argv); + if (benchmark::ReportUnrecognizedArguments(argc, argv)) + return 1; + makeCartesianProductBenchmark(Quantities); + benchmark::RunSpecifiedBenchmarks(); +} diff --git a/libcxx/test/benchmarks/algorithms/ranges_stable_sort.bench.cpp b/libcxx/test/benchmarks/algorithms/ranges_stable_sort.bench.cpp new file mode 100644 index 0000000000000..acc2f3f755fb8 --- /dev/null +++ b/libcxx/test/benchmarks/algorithms/ranges_stable_sort.bench.cpp @@ -0,0 +1,40 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 + +#include + +#include "common.h" + +namespace { +template +struct StableSort { + size_t Quantity; + + void run(benchmark::State& state) const { + runOpOnCopies(state, Quantity, Order(), BatchSize::CountElements, [](auto& Copy) { + std::ranges::stable_sort(Copy); + }); + } + + bool skip() const { return Order() == ::Order::Heap; } + + std::string name() const { + return "BM_RangesStableSort" + ValueType::name() + Order::name() + "_" + std::to_string(Quantity); + } +}; +} // namespace + +int main(int argc, char** argv) { + benchmark::Initialize(&argc, argv); + if (benchmark::ReportUnrecognizedArguments(argc, argv)) + return 1; + makeCartesianProductBenchmark(Quantities); + benchmark::RunSpecifiedBenchmarks(); +} diff --git a/libcxx/test/benchmarks/algorithms/sort.bench.cpp b/libcxx/test/benchmarks/algorithms/sort.bench.cpp new file mode 100644 index 0000000000000..7f3ce6ff7a07e --- /dev/null +++ b/libcxx/test/benchmarks/algorithms/sort.bench.cpp @@ -0,0 +1,38 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 + +#include + +#include "common.h" + +namespace { +template +struct Sort { + size_t Quantity; + + void run(benchmark::State& state) const { + runOpOnCopies(state, Quantity, Order(), BatchSize::CountElements, [](auto& Copy) { + std::sort(Copy.begin(), Copy.end()); + }); + } + + bool skip() const { return Order() == ::Order::Heap; } + + std::string name() const { return "BM_Sort" + ValueType::name() + Order::name() + "_" + std::to_string(Quantity); }; +}; +} // namespace + +int main(int argc, char** argv) { + benchmark::Initialize(&argc, argv); + if (benchmark::ReportUnrecognizedArguments(argc, argv)) + return 1; + makeCartesianProductBenchmark(Quantities); + benchmark::RunSpecifiedBenchmarks(); +} diff --git a/libcxx/test/benchmarks/algorithms/sorting/common.h b/libcxx/test/benchmarks/algorithms/sorting/common.h deleted file mode 100644 index 8195e9a2dc8d0..0000000000000 --- a/libcxx/test/benchmarks/algorithms/sorting/common.h +++ /dev/null @@ -1,141 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LIBCXX_TEST_BENCHMARKS_ALGORITHMS_SORTING_COMMON_H -#define LIBCXX_TEST_BENCHMARKS_ALGORITHMS_SORTING_COMMON_H - -#include -#include -#include -#include -#include -#include - -namespace support { - -// This function creates a vector with N int-like values. -// -// These values are arranged in such a way that they would invoke O(N^2) -// behavior on any quick sort implementation that satisifies certain conditions. -// Details are available in the following paper: -// -// "A Killer Adversary for Quicksort", M. D. McIlroy, Software-Practice & -// Experience Volume 29 Issue 4 April 10, 1999 pp 341-344. -// https://dl.acm.org/doi/10.5555/311868.311871. -template -std::vector quicksort_adversarial_data(std::size_t n) { - static_assert(std::is_integral_v); - assert(n > 0); - - // If an element is equal to gas, it indicates that the value of the element - // is still to be decided and may change over the course of time. - T gas = n - 1; - - std::vector v; - v.resize(n); - for (unsigned int i = 0; i < n; ++i) { - v[i] = gas; - } - // Candidate for the pivot position. - int candidate = 0; - int nsolid = 0; - // Populate all positions in the generated input to gas. - std::vector ascending_values(v.size()); - - // Fill up with ascending values from 0 to v.size()-1. These will act as - // indices into v. - std::iota(ascending_values.begin(), ascending_values.end(), 0); - std::sort(ascending_values.begin(), ascending_values.end(), [&](int x, int y) { - if (v[x] == gas && v[y] == gas) { - // We are comparing two inputs whose value is still to be decided. - if (x == candidate) { - v[x] = nsolid++; - } else { - v[y] = nsolid++; - } - } - if (v[x] == gas) { - candidate = x; - } else if (v[y] == gas) { - candidate = y; - } - return v[x] < v[y]; - }); - return v; -} - -// ascending sorted values -template -std::vector ascending_sorted_data(std::size_t n) { - std::vector v(n); - std::iota(v.begin(), v.end(), 0); - return v; -} - -// descending sorted values -template -std::vector descending_sorted_data(std::size_t n) { - std::vector v(n); - std::iota(v.begin(), v.end(), 0); - std::reverse(v.begin(), v.end()); - return v; -} - -// pipe-organ pattern -template -std::vector pipe_organ_data(std::size_t n) { - std::vector v(n); - std::iota(v.begin(), v.end(), 0); - auto half = v.begin() + v.size() / 2; - std::reverse(half, v.end()); - return v; -} - -// heap pattern -template -std::vector heap_data(std::size_t n) { - std::vector v(n); - std::iota(v.begin(), v.end(), 0); - std::make_heap(v.begin(), v.end()); - return v; -} - -// shuffled randomly -template -std::vector shuffled_data(std::size_t n) { - std::vector v(n); - std::iota(v.begin(), v.end(), 0); - std::mt19937 rng; - std::shuffle(v.begin(), v.end(), rng); - return v; -} - -// single element in the whole sequence -template -std::vector single_element_data(std::size_t n) { - std::vector v(n); - return v; -} - -struct NonIntegral { - NonIntegral() : value_(0) {} - NonIntegral(int i) : value_(i) {} - friend auto operator<(NonIntegral const& a, NonIntegral const& b) { return a.value_ < b.value_; } - friend auto operator>(NonIntegral const& a, NonIntegral const& b) { return a.value_ > b.value_; } - friend auto operator<=(NonIntegral const& a, NonIntegral const& b) { return a.value_ <= b.value_; } - friend auto operator>=(NonIntegral const& a, NonIntegral const& b) { return a.value_ >= b.value_; } - friend auto operator==(NonIntegral const& a, NonIntegral const& b) { return a.value_ == b.value_; } - friend auto operator!=(NonIntegral const& a, NonIntegral const& b) { return a.value_ != b.value_; } - -private: - int value_; -}; - -} // namespace support - -#endif // LIBCXX_TEST_BENCHMARKS_ALGORITHMS_SORTING_COMMON_H diff --git a/libcxx/test/benchmarks/algorithms/sorting/is_sorted.bench.cpp b/libcxx/test/benchmarks/algorithms/sorting/is_sorted.bench.cpp deleted file mode 100644 index 6e553e93d017c..0000000000000 --- a/libcxx/test/benchmarks/algorithms/sorting/is_sorted.bench.cpp +++ /dev/null @@ -1,82 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03, c++11, c++14, c++17 - -#include -#include -#include -#include -#include -#include -#include - -#include "benchmark/benchmark.h" -#include "../../GenerateInput.h" - -int main(int argc, char** argv) { - auto std_is_sorted = [](auto first, auto last) { return std::is_sorted(first, last); }; - auto std_is_sorted_pred = [](auto first, auto last) { - return std::is_sorted(first, last, [](auto x, auto y) { - benchmark::DoNotOptimize(x); - benchmark::DoNotOptimize(y); - return x < y; - }); - }; - auto ranges_is_sorted_pred = [](auto first, auto last) { - return std::ranges::is_sorted(first, last, [](auto x, auto y) { - benchmark::DoNotOptimize(x); - benchmark::DoNotOptimize(y); - return x < y; - }); - }; - - // Benchmark {std,ranges}::is_sorted on a sorted sequence (the worst case). - { - auto bm = [](std::string name, auto is_sorted) { - benchmark::RegisterBenchmark( - name, - [is_sorted](auto& st) { - std::size_t const size = st.range(0); - using ValueType = typename Container::value_type; - std::vector data; - std::generate_n(std::back_inserter(data), size, [] { return Generate::random(); }); - std::sort(data.begin(), data.end()); - - Container c(data.begin(), data.end()); - - for ([[maybe_unused]] auto _ : st) { - benchmark::DoNotOptimize(c); - auto result = is_sorted(c.begin(), c.end()); - benchmark::DoNotOptimize(result); - } - }) - ->Arg(8) - ->Arg(1024) - ->Arg(8192); - }; - bm.operator()>("std::is_sorted(vector)", std_is_sorted); - bm.operator()>("std::is_sorted(deque)", std_is_sorted); - bm.operator()>("std::is_sorted(list)", std_is_sorted); - bm.operator()>("rng::is_sorted(vector)", std::ranges::is_sorted); - bm.operator()>("rng::is_sorted(deque)", std::ranges::is_sorted); - bm.operator()>("rng::is_sorted(list)", std::ranges::is_sorted); - - bm.operator()>("std::is_sorted(vector, pred)", std_is_sorted_pred); - bm.operator()>("std::is_sorted(deque, pred)", std_is_sorted_pred); - bm.operator()>("std::is_sorted(list, pred)", std_is_sorted_pred); - bm.operator()>("rng::is_sorted(vector, pred)", ranges_is_sorted_pred); - bm.operator()>("rng::is_sorted(deque, pred)", ranges_is_sorted_pred); - bm.operator()>("rng::is_sorted(list, pred)", ranges_is_sorted_pred); - } - - benchmark::Initialize(&argc, argv); - benchmark::RunSpecifiedBenchmarks(); - benchmark::Shutdown(); - return 0; -} diff --git a/libcxx/test/benchmarks/algorithms/sorting/is_sorted_until.bench.cpp b/libcxx/test/benchmarks/algorithms/sorting/is_sorted_until.bench.cpp deleted file mode 100644 index ab11ee35327c7..0000000000000 --- a/libcxx/test/benchmarks/algorithms/sorting/is_sorted_until.bench.cpp +++ /dev/null @@ -1,82 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03, c++11, c++14, c++17 - -#include -#include -#include -#include -#include -#include -#include - -#include "benchmark/benchmark.h" -#include "../../GenerateInput.h" - -int main(int argc, char** argv) { - auto std_is_sorted_until = [](auto first, auto last) { return std::is_sorted_until(first, last); }; - auto std_is_sorted_until_pred = [](auto first, auto last) { - return std::is_sorted_until(first, last, [](auto x, auto y) { - benchmark::DoNotOptimize(x); - benchmark::DoNotOptimize(y); - return x < y; - }); - }; - auto ranges_is_sorted_until_pred = [](auto first, auto last) { - return std::ranges::is_sorted_until(first, last, [](auto x, auto y) { - benchmark::DoNotOptimize(x); - benchmark::DoNotOptimize(y); - return x < y; - }); - }; - - // Benchmark {std,ranges}::is_sorted_until on a sorted sequence (the worst case). - { - auto bm = [](std::string name, auto is_sorted_until) { - benchmark::RegisterBenchmark( - name, - [is_sorted_until](auto& st) { - std::size_t const size = st.range(0); - using ValueType = typename Container::value_type; - std::vector data; - std::generate_n(std::back_inserter(data), size, [] { return Generate::random(); }); - std::sort(data.begin(), data.end()); - - Container c(data.begin(), data.end()); - - for ([[maybe_unused]] auto _ : st) { - benchmark::DoNotOptimize(c); - auto result = is_sorted_until(c.begin(), c.end()); - benchmark::DoNotOptimize(result); - } - }) - ->Arg(8) - ->Arg(1024) - ->Arg(8192); - }; - bm.operator()>("std::is_sorted_until(vector)", std_is_sorted_until); - bm.operator()>("std::is_sorted_until(deque)", std_is_sorted_until); - bm.operator()>("std::is_sorted_until(list)", std_is_sorted_until); - bm.operator()>("rng::is_sorted_until(vector)", std::ranges::is_sorted_until); - bm.operator()>("rng::is_sorted_until(deque)", std::ranges::is_sorted_until); - bm.operator()>("rng::is_sorted_until(list)", std::ranges::is_sorted_until); - - bm.operator()>("std::is_sorted_until(vector, pred)", std_is_sorted_until_pred); - bm.operator()>("std::is_sorted_until(deque, pred)", std_is_sorted_until_pred); - bm.operator()>("std::is_sorted_until(list, pred)", std_is_sorted_until_pred); - bm.operator()>("rng::is_sorted_until(vector, pred)", ranges_is_sorted_until_pred); - bm.operator()>("rng::is_sorted_until(deque, pred)", ranges_is_sorted_until_pred); - bm.operator()>("rng::is_sorted_until(list, pred)", ranges_is_sorted_until_pred); - } - - benchmark::Initialize(&argc, argv); - benchmark::RunSpecifiedBenchmarks(); - benchmark::Shutdown(); - return 0; -} diff --git a/libcxx/test/benchmarks/algorithms/sorting/partial_sort.bench.cpp b/libcxx/test/benchmarks/algorithms/sorting/partial_sort.bench.cpp deleted file mode 100644 index 7000be66920d0..0000000000000 --- a/libcxx/test/benchmarks/algorithms/sorting/partial_sort.bench.cpp +++ /dev/null @@ -1,95 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03, c++11, c++14, c++17 - -#include -#include -#include -#include -#include -#include - -#include "benchmark/benchmark.h" -#include "common.h" - -int main(int argc, char** argv) { - auto std_partial_sort = [](auto first, auto mid, auto last) { return std::partial_sort(first, mid, last); }; - - // Benchmark {std,ranges}::partial_sort on various types of data. We always partially sort only - // half of the full range. - // - // We perform this benchmark in a batch because we need to restore the - // state of the container after the operation. - // - // Also note that we intentionally don't benchmark the predicated version of the algorithm - // because that makes the benchmark run too slowly. - { - auto bm = [](std::string name, auto partial_sort, auto generate_data) { - benchmark::RegisterBenchmark( - name, - [partial_sort, generate_data](auto& st) { - std::size_t const size = st.range(0); - constexpr std::size_t BatchSize = 32; - using ValueType = typename Container::value_type; - std::vector data = generate_data(size); - std::array c; - std::fill_n(c.begin(), BatchSize, Container(data.begin(), data.end())); - - std::size_t const half = size / 2; - while (st.KeepRunningBatch(BatchSize)) { - for (std::size_t i = 0; i != BatchSize; ++i) { - benchmark::DoNotOptimize(c[i]); - partial_sort(c[i].begin(), c[i].begin() + half, c[i].end()); - benchmark::DoNotOptimize(c[i]); - } - - st.PauseTiming(); - for (std::size_t i = 0; i != BatchSize; ++i) { - std::copy(data.begin(), data.end(), c[i].begin()); - } - st.ResumeTiming(); - } - }) - ->Arg(8) - ->Arg(1024) - ->Arg(8192); - }; - - auto register_bm = [&](auto generate, std::string variant) { - auto gen2 = [generate](auto size) { - std::vector data = generate(size); - std::vector real_data(data.begin(), data.end()); - return real_data; - }; - auto name = [variant](std::string op) { return op + " (" + variant + ")"; }; - bm.operator()>(name("std::partial_sort(vector"), std_partial_sort, generate); - bm.operator()>( - name("std::partial_sort(vector"), std_partial_sort, gen2); - bm.operator()>(name("std::partial_sort(deque"), std_partial_sort, generate); - - bm.operator()>(name("rng::partial_sort(vector"), std::ranges::partial_sort, generate); - bm.operator()>( - name("rng::partial_sort(vector"), std::ranges::partial_sort, gen2); - bm.operator()>(name("rng::partial_sort(deque"), std::ranges::partial_sort, generate); - }; - - register_bm(support::quicksort_adversarial_data, "qsort adversarial"); - register_bm(support::ascending_sorted_data, "ascending"); - register_bm(support::descending_sorted_data, "descending"); - register_bm(support::pipe_organ_data, "pipe-organ"); - register_bm(support::heap_data, "heap"); - register_bm(support::shuffled_data, "shuffled"); - register_bm(support::single_element_data, "repeated"); - } - - benchmark::Initialize(&argc, argv); - benchmark::RunSpecifiedBenchmarks(); - benchmark::Shutdown(); - return 0; -} diff --git a/libcxx/test/benchmarks/algorithms/sorting/partial_sort_copy.bench.cpp b/libcxx/test/benchmarks/algorithms/sorting/partial_sort_copy.bench.cpp deleted file mode 100644 index 2ebc286b1c03b..0000000000000 --- a/libcxx/test/benchmarks/algorithms/sorting/partial_sort_copy.bench.cpp +++ /dev/null @@ -1,90 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03, c++11, c++14, c++17 - -#include -#include -#include -#include -#include -#include - -#include "benchmark/benchmark.h" -#include "common.h" - -int main(int argc, char** argv) { - auto std_partial_sort_copy = [](auto first, auto last, auto dfirst, auto dlast) { - return std::partial_sort_copy(first, last, dfirst, dlast); - }; - - // Benchmark {std,ranges}::partial_sort_copy on various types of data. We always partially - // sort only half of the full range. - // - // Also note that we intentionally don't benchmark the predicated version of the algorithm - // because that makes the benchmark run too slowly. - { - auto bm = [](std::string name, auto partial_sort_copy, auto generate_data) { - benchmark::RegisterBenchmark( - name, - [partial_sort_copy, generate_data](auto& st) { - std::size_t const size = st.range(0); - using ValueType = typename Container::value_type; - std::vector data = generate_data(size); - Container c(data.begin(), data.end()); - std::vector out(size / 2); - - for ([[maybe_unused]] auto _ : st) { - benchmark::DoNotOptimize(c); - benchmark::DoNotOptimize(out); - auto result = partial_sort_copy(c.begin(), c.end(), out.begin(), out.end()); - benchmark::DoNotOptimize(result); - } - }) - ->Arg(8) - ->Arg(1024) - ->Arg(8192); - }; - - auto register_bm = [&](auto generate, std::string variant) { - auto gen2 = [generate](auto size) { - std::vector data = generate(size); - std::vector real_data(data.begin(), data.end()); - return real_data; - }; - auto name = [variant](std::string op) { return op + " (" + variant + ")"; }; - bm.operator()>(name("std::partial_sort_copy(vector)"), std_partial_sort_copy, generate); - bm.operator()>( - name("std::partial_sort_copy(vector)"), std_partial_sort_copy, gen2); - bm.operator()>(name("std::partial_sort_copy(deque)"), std_partial_sort_copy, generate); - bm.operator()>(name("std::partial_sort_copy(list)"), std_partial_sort_copy, generate); - - bm.operator()>( - name("rng::partial_sort_copy(vector)"), std::ranges::partial_sort_copy, generate); - bm.operator()>( - name("rng::partial_sort_copy(vector)"), std::ranges::partial_sort_copy, gen2); - bm.operator()>( - name("rng::partial_sort_copy(deque)"), std::ranges::partial_sort_copy, generate); - bm.operator()>( - name("rng::partial_sort_copy(list)"), std::ranges::partial_sort_copy, generate); - }; - - register_bm(support::quicksort_adversarial_data, "qsort adversarial"); - register_bm(support::ascending_sorted_data, "ascending"); - register_bm(support::descending_sorted_data, "descending"); - register_bm(support::pipe_organ_data, "pipe-organ"); - register_bm(support::heap_data, "heap"); - register_bm(support::shuffled_data, "shuffled"); - register_bm(support::single_element_data, "repeated"); - } - - benchmark::Initialize(&argc, argv); - benchmark::RunSpecifiedBenchmarks(); - benchmark::Shutdown(); - return 0; -} diff --git a/libcxx/test/benchmarks/algorithms/sorting/sort.bench.cpp b/libcxx/test/benchmarks/algorithms/sorting/sort.bench.cpp deleted file mode 100644 index d12aa108fe123..0000000000000 --- a/libcxx/test/benchmarks/algorithms/sorting/sort.bench.cpp +++ /dev/null @@ -1,91 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03, c++11, c++14, c++17 - -#include -#include -#include -#include -#include -#include - -#include "benchmark/benchmark.h" -#include "common.h" - -int main(int argc, char** argv) { - auto std_sort = [](auto first, auto last) { return std::sort(first, last); }; - - // Benchmark {std,ranges}::sort on various types of data - // - // We perform this benchmark in a batch because we need to restore the - // state of the container after the operation. - // - // Also note that we intentionally don't benchmark the predicated version of the algorithm - // because that makes the benchmark run too slowly. - { - auto bm = [](std::string name, auto sort, auto generate_data) { - benchmark::RegisterBenchmark( - name, - [sort, generate_data](auto& st) { - std::size_t const size = st.range(0); - constexpr std::size_t BatchSize = 32; - using ValueType = typename Container::value_type; - std::vector data = generate_data(size); - std::array c; - std::fill_n(c.begin(), BatchSize, Container(data.begin(), data.end())); - - while (st.KeepRunningBatch(BatchSize)) { - for (std::size_t i = 0; i != BatchSize; ++i) { - benchmark::DoNotOptimize(c[i]); - sort(c[i].begin(), c[i].end()); - benchmark::DoNotOptimize(c[i]); - } - - st.PauseTiming(); - for (std::size_t i = 0; i != BatchSize; ++i) { - std::copy(data.begin(), data.end(), c[i].begin()); - } - st.ResumeTiming(); - } - }) - ->Arg(8) - ->Arg(1024) - ->Arg(8192); - }; - - auto register_bm = [&](auto generate, std::string variant) { - auto gen2 = [generate](auto size) { - std::vector data = generate(size); - std::vector real_data(data.begin(), data.end()); - return real_data; - }; - auto name = [variant](std::string op) { return op + " (" + variant + ")"; }; - bm.operator()>(name("std::sort(vector)"), std_sort, generate); - bm.operator()>(name("std::sort(vector)"), std_sort, gen2); - bm.operator()>(name("std::sort(deque)"), std_sort, generate); - - bm.operator()>(name("rng::sort(vector)"), std::ranges::sort, generate); - bm.operator()>(name("rng::sort(vector)"), std::ranges::sort, gen2); - bm.operator()>(name("rng::sort(deque)"), std::ranges::sort, generate); - }; - - register_bm(support::quicksort_adversarial_data, "qsort adversarial"); - register_bm(support::ascending_sorted_data, "ascending"); - register_bm(support::descending_sorted_data, "descending"); - register_bm(support::pipe_organ_data, "pipe-organ"); - register_bm(support::heap_data, "heap"); - register_bm(support::shuffled_data, "shuffled"); - register_bm(support::single_element_data, "repeated"); - } - - benchmark::Initialize(&argc, argv); - benchmark::RunSpecifiedBenchmarks(); - benchmark::Shutdown(); - return 0; -} diff --git a/libcxx/test/benchmarks/algorithms/sorting/stable_sort.bench.cpp b/libcxx/test/benchmarks/algorithms/sorting/stable_sort.bench.cpp deleted file mode 100644 index 8040f5c12a46a..0000000000000 --- a/libcxx/test/benchmarks/algorithms/sorting/stable_sort.bench.cpp +++ /dev/null @@ -1,159 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03, c++11, c++14, c++17 - -#include -#include -#include -#include -#include -#include - -#include "benchmark/benchmark.h" -#include "common.h" -#include "count_new.h" - -int main(int argc, char** argv) { - auto std_stable_sort = [](auto first, auto last) { return std::stable_sort(first, last); }; - - // Benchmark {std,ranges}::stable_sort on various types of data - // - // We perform this benchmark in a batch because we need to restore the - // state of the container after the operation. - // - // Also note that we intentionally don't benchmark the predicated version of the algorithm - // because that makes the benchmark run too slowly. - { - auto bm = [](std::string name, auto stable_sort, auto generate_data) { - benchmark::RegisterBenchmark( - name, - [stable_sort, generate_data](auto& st) { - std::size_t const size = st.range(0); - constexpr std::size_t BatchSize = 32; - using ValueType = typename Container::value_type; - std::vector data = generate_data(size); - std::array c; - std::fill_n(c.begin(), BatchSize, Container(data.begin(), data.end())); - - while (st.KeepRunningBatch(BatchSize)) { - for (std::size_t i = 0; i != BatchSize; ++i) { - benchmark::DoNotOptimize(c[i]); - stable_sort(c[i].begin(), c[i].end()); - benchmark::DoNotOptimize(c[i]); - } - - st.PauseTiming(); - for (std::size_t i = 0; i != BatchSize; ++i) { - std::copy(data.begin(), data.end(), c[i].begin()); - } - st.ResumeTiming(); - } - }) - ->Arg(8) - ->Arg(1024) - ->Arg(8192); - }; - - auto register_bm = [&](auto generate, std::string variant) { - auto gen2 = [generate](auto size) { - std::vector data = generate(size); - std::vector real_data(data.begin(), data.end()); - return real_data; - }; - auto name = [variant](std::string op) { return op + " (" + variant + ")"; }; - bm.operator()>(name("std::stable_sort(vector)"), std_stable_sort, generate); - bm.operator()>( - name("std::stable_sort(vector)"), std_stable_sort, gen2); - bm.operator()>(name("std::stable_sort(deque)"), std_stable_sort, generate); - - bm.operator()>(name("rng::stable_sort(vector)"), std::ranges::stable_sort, generate); - bm.operator()>( - name("rng::stable_sort(vector)"), std::ranges::stable_sort, gen2); - bm.operator()>(name("rng::stable_sort(deque)"), std::ranges::stable_sort, generate); - }; - - register_bm(support::quicksort_adversarial_data, "qsort adversarial"); - register_bm(support::ascending_sorted_data, "ascending"); - register_bm(support::descending_sorted_data, "descending"); - register_bm(support::pipe_organ_data, "pipe-organ"); - register_bm(support::heap_data, "heap"); - register_bm(support::shuffled_data, "shuffled"); - register_bm(support::single_element_data, "repeated"); - } - - // Benchmark {std,ranges}::stable_sort when memory allocation fails. The algorithm must fall back to - // a different algorithm that has different complexity guarantees. - { - auto bm = [](std::string name, auto stable_sort, auto generate_data) { - benchmark::RegisterBenchmark( - name, - [stable_sort, generate_data](auto& st) { - std::size_t const size = st.range(0); - constexpr std::size_t BatchSize = 32; - using ValueType = typename Container::value_type; - std::vector data = generate_data(size); - std::array c; - std::fill_n(c.begin(), BatchSize, Container(data.begin(), data.end())); - - while (st.KeepRunningBatch(BatchSize)) { - for (std::size_t i = 0; i != BatchSize; ++i) { - benchmark::DoNotOptimize(c[i]); - // Disable the ability to allocate memory inside this block - globalMemCounter.throw_after = 0; - - stable_sort(c[i].begin(), c[i].end()); - benchmark::DoNotOptimize(c[i]); - - globalMemCounter.reset(); - } - - st.PauseTiming(); - for (std::size_t i = 0; i != BatchSize; ++i) { - std::copy(data.begin(), data.end(), c[i].begin()); - } - st.ResumeTiming(); - } - }) - ->Arg(8) - ->Arg(1024) - ->Arg(8192); - }; - - auto register_bm = [&](auto generate, std::string variant) { - auto gen2 = [generate](auto size) { - std::vector data = generate(size); - std::vector real_data(data.begin(), data.end()); - return real_data; - }; - auto name = [variant](std::string op) { return op + " (alloc fails, " + variant + ")"; }; - bm.operator()>(name("std::stable_sort(vector)"), std_stable_sort, generate); - bm.operator()>( - name("std::stable_sort(vector)"), std_stable_sort, gen2); - bm.operator()>(name("std::stable_sort(deque)"), std_stable_sort, generate); - - bm.operator()>(name("rng::stable_sort(vector)"), std::ranges::stable_sort, generate); - bm.operator()>( - name("rng::stable_sort(vector)"), std::ranges::stable_sort, gen2); - bm.operator()>(name("rng::stable_sort(deque)"), std::ranges::stable_sort, generate); - }; - - register_bm(support::quicksort_adversarial_data, "qsort adversarial"); - register_bm(support::ascending_sorted_data, "ascending"); - register_bm(support::descending_sorted_data, "descending"); - register_bm(support::pipe_organ_data, "pipe-organ"); - register_bm(support::heap_data, "heap"); - register_bm(support::shuffled_data, "shuffled"); - register_bm(support::single_element_data, "repeated"); - } - - benchmark::Initialize(&argc, argv); - benchmark::RunSpecifiedBenchmarks(); - benchmark::Shutdown(); - return 0; -} diff --git a/libcxx/test/benchmarks/algorithms/stable_sort.bench.cpp b/libcxx/test/benchmarks/algorithms/stable_sort.bench.cpp new file mode 100644 index 0000000000000..26e8de935f5c5 --- /dev/null +++ b/libcxx/test/benchmarks/algorithms/stable_sort.bench.cpp @@ -0,0 +1,40 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 + +#include + +#include "common.h" + +namespace { +template +struct StableSort { + size_t Quantity; + + void run(benchmark::State& state) const { + runOpOnCopies(state, Quantity, Order(), BatchSize::CountBatch, [](auto& Copy) { + std::stable_sort(Copy.begin(), Copy.end()); + }); + } + + bool skip() const { return Order() == ::Order::Heap; } + + std::string name() const { + return "BM_StableSort" + ValueType::name() + Order::name() + "_" + std::to_string(Quantity); + }; +}; +} // namespace + +int main(int argc, char** argv) { + benchmark::Initialize(&argc, argv); + if (benchmark::ReportUnrecognizedArguments(argc, argv)) + return 1; + makeCartesianProductBenchmark(Quantities); + benchmark::RunSpecifiedBenchmarks(); +} diff --git a/libcxx/test/benchmarks/format/format.bench.cpp b/libcxx/test/benchmarks/format/format.bench.cpp index 65caac747cbab..267ef22950668 100644 --- a/libcxx/test/benchmarks/format/format.bench.cpp +++ b/libcxx/test/benchmarks/format/format.bench.cpp @@ -35,15 +35,4 @@ BENCHMARK(BM_format_string)->RangeMultiplier(2)->Range(1, 1 << 20); BENCHMARK(BM_format_string)->RangeMultiplier(2)->Range(1, 1 << 20); #endif -template -static void BM_string_without_formatting(benchmark::State& state) { - for (auto _ : state) { - benchmark::DoNotOptimize(std::format(CSTR("Hello, World!"))); - } -} -BENCHMARK(BM_string_without_formatting); -#ifndef TEST_HAS_NO_WIDE_CHARACTERS -BENCHMARK(BM_string_without_formatting); -#endif - BENCHMARK_MAIN(); diff --git a/libcxx/test/configs/cmake-bridge.cfg.in b/libcxx/test/configs/cmake-bridge.cfg.in index d7d588669032d..61f821a7e4f6b 100644 --- a/libcxx/test/configs/cmake-bridge.cfg.in +++ b/libcxx/test/configs/cmake-bridge.cfg.in @@ -23,7 +23,6 @@ config.recursiveExpansionLimit = 10 config.test_exec_root = os.path.join('@LIBCXX_BINARY_DIR@', 'test') # Add substitutions for bootstrapping the test suite configuration -config.substitutions.append(('%{bin-dir}', '@LIBCXX_BINARY_DIR@')) config.substitutions.append(('%{libcxx-dir}', '@LIBCXX_SOURCE_DIR@')) config.substitutions.append(('%{install-prefix}', '@LIBCXX_TESTING_INSTALL_PREFIX@')) config.substitutions.append(('%{include-dir}', '@LIBCXX_TESTING_INSTALL_PREFIX@/@LIBCXX_INSTALL_INCLUDE_DIR@')) diff --git a/libcxx/test/libcxx/clang_tidy.sh.py b/libcxx/test/libcxx/clang_tidy.sh.py deleted file mode 100644 index 46f281f359209..0000000000000 --- a/libcxx/test/libcxx/clang_tidy.sh.py +++ /dev/null @@ -1,11 +0,0 @@ -# ===----------------------------------------------------------------------===## -# -# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -# ===----------------------------------------------------------------------===## - -# REQUIRES: has-clang-tidy - -# RUN: %{python} %{libcxx-dir}/../clang-tools-extra/clang-tidy/tool/run-clang-tidy.py -clang-tidy-binary %{clang-tidy} -warnings-as-errors "*" -source-filter=".*libcxx/src.*" -quiet -p %{bin-dir}/.. diff --git a/libcxx/test/libcxx/containers/associative/tree_key_value_traits.pass.cpp b/libcxx/test/libcxx/containers/associative/tree_key_value_traits.pass.cpp index 04dcb8f54fafc..e3a5a6f634138 100644 --- a/libcxx/test/libcxx/containers/associative/tree_key_value_traits.pass.cpp +++ b/libcxx/test/libcxx/containers/associative/tree_key_value_traits.pass.cpp @@ -21,6 +21,7 @@ void testKeyValueTrait() { typedef int Tp; typedef std::__tree_key_value_types Traits; static_assert((std::is_same::value), ""); + static_assert((std::is_same::value), ""); static_assert((std::is_same::value), ""); static_assert(Traits::__is_map == false, ""); } @@ -28,6 +29,7 @@ void testKeyValueTrait() { typedef std::pair Tp; typedef std::__tree_key_value_types Traits; static_assert((std::is_same::value), ""); + static_assert((std::is_same::value), ""); static_assert((std::is_same::value), ""); static_assert(Traits::__is_map == false, ""); } @@ -35,6 +37,7 @@ void testKeyValueTrait() { typedef std::pair Tp; typedef std::__tree_key_value_types Traits; static_assert((std::is_same::value), ""); + static_assert((std::is_same::value), ""); static_assert((std::is_same::value), ""); static_assert(Traits::__is_map == false, ""); } @@ -43,6 +46,7 @@ void testKeyValueTrait() { typedef std::__tree_key_value_types Traits; static_assert((std::is_same::value), ""); static_assert((std::is_same::value), ""); + static_assert((std::is_same::value), ""); static_assert((std::is_same >::value), ""); static_assert((std::is_same >::value), ""); static_assert(Traits::__is_map == true, ""); diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.swap/ranges.swap_ranges.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.swap/ranges.swap_ranges.pass.cpp index 85557ecbbfabc..93090ed6138f8 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.swap/ranges.swap_ranges.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.swap/ranges.swap_ranges.pass.cpp @@ -30,40 +30,108 @@ #include "test_iterators.h" #include "type_algorithms.h" +constexpr void test_different_lengths() { + using Expected = std::ranges::swap_ranges_result; + int i[3] = {1, 2, 3}; + int j[1] = {4}; + std::same_as auto r = std::ranges::swap_ranges(i, i + 3, j, j + 1); + assert(r.in1 == i + 1); + assert(r.in2 == j + 1); + assert(std::ranges::equal(i, std::array{4, 2, 3})); + assert(std::ranges::equal(j, std::array{1})); + std::same_as auto r2 = std::ranges::swap_ranges(i, j); + assert(r2.in1 == i + 1); + assert(r2.in2 == j + 1); + assert(std::ranges::equal(i, std::array{1, 2, 3})); + assert(std::ranges::equal(j, std::array{4})); + std::same_as auto r3 = std::ranges::swap_ranges(j, j + 1, i, i + 3); + assert(r3.in1 == j + 1); + assert(r3.in2 == i + 1); + assert(std::ranges::equal(i, std::array{4, 2, 3})); + assert(std::ranges::equal(j, std::array{1})); + std::same_as auto r4 = std::ranges::swap_ranges(j, i); + assert(r4.in1 == j + 1); + assert(r4.in2 == i + 1); + assert(std::ranges::equal(i, std::array{1, 2, 3})); + assert(std::ranges::equal(j, std::array{4})); +} + +constexpr void test_range() { + std::array r1 = {1, 2, 3}; + std::array r2 = {4, 5, 6}; + + std::same_as::iterator, std::array::iterator>> auto r = + std::ranges::swap_ranges(r1, r2); + assert(r.in1 == r1.end()); + assert(r.in2 == r2.end()); + assert((r1 == std::array{4, 5, 6})); + assert((r2 == std::array{1, 2, 3})); +} + +constexpr void test_borrowed_input_range() { + { + int r1[] = {1, 2, 3}; + int r2[] = {4, 5, 6}; + std::ranges::swap_ranges(std::views::all(r1), r2); + assert(std::ranges::equal(r1, std::array{4, 5, 6})); + assert(std::ranges::equal(r2, std::array{1, 2, 3})); + } + { + int r1[] = {1, 2, 3}; + int r2[] = {4, 5, 6}; + std::ranges::swap_ranges(r1, std::views::all(r2)); + assert(std::ranges::equal(r1, std::array{4, 5, 6})); + assert(std::ranges::equal(r2, std::array{1, 2, 3})); + } + { + int r1[] = {1, 2, 3}; + int r2[] = {4, 5, 6}; + std::ranges::swap_ranges(std::views::all(r1), std::views::all(r2)); + assert(std::ranges::equal(r1, std::array{4, 5, 6})); + assert(std::ranges::equal(r2, std::array{1, 2, 3})); + } +} + +constexpr void test_sentinel() { + int i[3] = {1, 2, 3}; + int j[3] = {4, 5, 6}; + using It = cpp17_input_iterator; + using Sent = sentinel_wrapper; + using Expected = std::ranges::swap_ranges_result; + std::same_as auto r = std::ranges::swap_ranges(It(i), Sent(It(i + 3)), It(j), Sent(It(j + 3))); + assert(base(r.in1) == i + 3); + assert(base(r.in2) == j + 3); + assert(std::ranges::equal(i, std::array{4, 5, 6})); + assert(std::ranges::equal(j, std::array{1, 2, 3})); +} + template TEST_CONSTEXPR_CXX20 void test_iterators() { using Expected = std::ranges::swap_ranges_result; - { // Basic test case: swapping three elements between two arrays - int a[3] = {1, 2, 3}; - int b[3] = {4, 5, 6}; - std::same_as auto r = - std::ranges::swap_ranges(Iter1(a), sentinel_wrapper(Iter1(a + 3)), Iter2(b), sentinel_wrapper(Iter2(b + 3))); - assert(base(r.in1) == a + 3); - assert(base(r.in2) == b + 3); - assert(std::ranges::equal(a, std::array{4, 5, 6})); - assert(std::ranges::equal(b, std::array{1, 2, 3})); + int a[3] = {1, 2, 3}; + int b[3] = {4, 5, 6}; + std::same_as auto r = + std::ranges::swap_ranges(Iter1(a), sentinel_wrapper(Iter1(a + 3)), Iter2(b), sentinel_wrapper(Iter2(b + 3))); + assert(base(r.in1) == a + 3); + assert(base(r.in2) == b + 3); + assert(std::ranges::equal(a, std::array{4, 5, 6})); + assert(std::ranges::equal(b, std::array{1, 2, 3})); +} + +constexpr void test_rval_range() { + { + using Expected = std::ranges::swap_ranges_result::iterator, std::ranges::dangling>; + std::array r = {1, 2, 3}; + std::same_as auto a = std::ranges::swap_ranges(r, std::array{4, 5, 6}); + assert((r == std::array{4, 5, 6})); + assert(a.in1 == r.begin() + 3); } - { // Large-scale test: swapping 100 elements between two different containers - const int N = 100; - std::array a; - std::vector b(N + 2, 42); - b.front() = 1; - b.back() = -1; - for (int i = 0; i < N; ++i) - a[i] = i * i + 1; - std::same_as auto r = std::ranges::swap_ranges( - Iter1(a.data()), - sentinel_wrapper(Iter1(a.data() + N)), - Iter2(b.data() + 1), - sentinel_wrapper(Iter2(b.data() + b.size()))); - assert(base(r.in1) == a.data() + N); - assert(base(r.in2) == b.data() + N + 1); - assert(b.front() == 1); // Ensure that the unswapped portion remains unchanged - assert(b.back() == -1); - for (int i = 0; i < N; ++i) { - assert(a[i] == 42); - assert(b[i + 1] == i * i + 1); - } + { + std::array r = {1, 2, 3}; + using Expected = std::ranges::swap_ranges_result::iterator>; + std::same_as auto b = std::ranges::swap_ranges(std::array{4, 5, 6}, r); + assert((r == std::array{4, 5, 6})); + assert(b.in2 == r.begin() + 3); } } @@ -84,97 +152,11 @@ constexpr void test_vector_bool() { } constexpr bool test() { - { // Validate swapping ranges directly - std::array r1 = {1, 2, 3}; - std::array r2 = {4, 5, 6}; - - std::same_as::iterator, std::array::iterator>> auto r = - std::ranges::swap_ranges(r1, r2); - assert(r.in1 == r1.end()); - assert(r.in2 == r2.end()); - assert((r1 == std::array{4, 5, 6})); - assert((r2 == std::array{1, 2, 3})); - } - - { // Validate swapping ranges using iterator and sentinels - int i[3] = {1, 2, 3}; - int j[3] = {4, 5, 6}; - using It = cpp17_input_iterator; - using Sent = sentinel_wrapper; - using Expected = std::ranges::swap_ranges_result; - std::same_as auto r = std::ranges::swap_ranges(It(i), Sent(It(i + 3)), It(j), Sent(It(j + 3))); - assert(base(r.in1) == i + 3); - assert(base(r.in2) == j + 3); - assert(std::ranges::equal(i, std::array{4, 5, 6})); - assert(std::ranges::equal(j, std::array{1, 2, 3})); - } - - { // Validate swapping ranges of different lengths - using Expected = std::ranges::swap_ranges_result; - int i[3] = {1, 2, 3}; - int j[1] = {4}; - std::same_as auto r = std::ranges::swap_ranges(i, i + 3, j, j + 1); - assert(r.in1 == i + 1); - assert(r.in2 == j + 1); - assert(std::ranges::equal(i, std::array{4, 2, 3})); - assert(std::ranges::equal(j, std::array{1})); - std::same_as auto r2 = std::ranges::swap_ranges(i, j); - assert(r2.in1 == i + 1); - assert(r2.in2 == j + 1); - assert(std::ranges::equal(i, std::array{1, 2, 3})); - assert(std::ranges::equal(j, std::array{4})); - std::same_as auto r3 = std::ranges::swap_ranges(j, j + 1, i, i + 3); - assert(r3.in1 == j + 1); - assert(r3.in2 == i + 1); - assert(std::ranges::equal(i, std::array{4, 2, 3})); - assert(std::ranges::equal(j, std::array{1})); - std::same_as auto r4 = std::ranges::swap_ranges(j, i); - assert(r4.in1 == j + 1); - assert(r4.in2 == i + 1); - assert(std::ranges::equal(i, std::array{1, 2, 3})); - assert(std::ranges::equal(j, std::array{4})); - } - - { // Validate swapping when one or both are borrowed input ranges (views) - { - int r1[] = {1, 2, 3}; - int r2[] = {4, 5, 6}; - std::ranges::swap_ranges(std::views::all(r1), r2); - assert(std::ranges::equal(r1, std::array{4, 5, 6})); - assert(std::ranges::equal(r2, std::array{1, 2, 3})); - } - { - int r1[] = {1, 2, 3}; - int r2[] = {4, 5, 6}; - std::ranges::swap_ranges(r1, std::views::all(r2)); - assert(std::ranges::equal(r1, std::array{4, 5, 6})); - assert(std::ranges::equal(r2, std::array{1, 2, 3})); - } - { - int r1[] = {1, 2, 3}; - int r2[] = {4, 5, 6}; - std::ranges::swap_ranges(std::views::all(r1), std::views::all(r2)); - assert(std::ranges::equal(r1, std::array{4, 5, 6})); - assert(std::ranges::equal(r2, std::array{1, 2, 3})); - } - } - - { // Validate swapping involving rvalue ranges - { - using Expected = std::ranges::swap_ranges_result::iterator, std::ranges::dangling>; - std::array r = {1, 2, 3}; - std::same_as auto a = std::ranges::swap_ranges(r, std::array{4, 5, 6}); - assert((r == std::array{4, 5, 6})); - assert(a.in1 == r.begin() + 3); - } - { - std::array r = {1, 2, 3}; - using Expected = std::ranges::swap_ranges_result::iterator>; - std::same_as auto b = std::ranges::swap_ranges(std::array{4, 5, 6}, r); - assert((r == std::array{4, 5, 6})); - assert(b.in2 == r.begin() + 3); - } - } + test_range(); + test_sentinel(); + test_different_lengths(); + test_borrowed_input_range(); + test_rval_range(); types::for_each(types::cpp20_input_iterator_list(), []() { types::for_each(types::cpp20_input_iterator_list(), []() { diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.swap/swap_ranges.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.swap/swap_ranges.pass.cpp index 84ebedf213f5b..01cd33150e236 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.swap/swap_ranges.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.swap/swap_ranges.pass.cpp @@ -35,31 +35,12 @@ struct TestPtr { struct TestImpl { template TEST_CONSTEXPR_CXX20 void operator()() { - { // Basic test case: swapping three elements between two arrays - int a[] = {1, 2, 3}; - int b[] = {4, 5, 6}; - Iter2 r = std::swap_ranges(Iter1(a), Iter1(a + 3), Iter2(b)); - assert(base(r) == b + 3); - assert(a[0] == 4 && a[1] == 5 && a[2] == 6); - assert(b[0] == 1 && b[1] == 2 && b[2] == 3); - } - { // Large-scale test: swapping 100 elements between two different containers - const int N = 100; - std::array a; - std::vector b(N + 2, 42); - b.front() = 1; - b.back() = -1; - for (int i = 0; i < N; ++i) - a[i] = i * i + 1; - Iter2 r = std::swap_ranges(Iter1(a.data()), Iter1(a.data() + N), Iter2(b.data() + 1)); - assert(base(r) == b.data() + N + 1); - assert(b.front() == 1); // Ensure that the unswapped portion remains unchanged - assert(b.back() == -1); - for (int i = 0; i < N; ++i) { - assert(a[i] == 42); - assert(b[i + 1] == i * i + 1); - } - } + int a[] = {1, 2, 3}; + int b[] = {4, 5, 6}; + Iter2 r = std::swap_ranges(Iter1(a), Iter1(a + 3), Iter2(b)); + assert(base(r) == b + 3); + assert(a[0] == 4 && a[1] == 5 && a[2] == 6); + assert(b[0] == 1 && b[1] == 2 && b[2] == 3); } }; }; diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.equal/equal.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.equal/equal.pass.cpp index 859532d4b79c7..02cc84c288828 100644 --- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.equal/equal.pass.cpp +++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.equal/equal.pass.cpp @@ -20,8 +20,6 @@ // We test the cartesian product, so we sometimes compare differently signed types // ADDITIONAL_COMPILE_FLAGS(gcc-style-warnings): -Wno-sign-compare -// ADDITIONAL_COMPILE_FLAGS(character-conversion-warnings): -Wno-character-conversion - // MSVC warning C4242: 'argument': conversion from 'int' to 'const _Ty', possible loss of data // MSVC warning C4244: 'argument': conversion from 'wchar_t' to 'const _Ty', possible loss of data // MSVC warning C4389: '==': signed/unsigned mismatch diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.find/find.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.find/find.pass.cpp index 989edcb3f6eed..3aaeb9c2f345f 100644 --- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.find/find.pass.cpp +++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.find/find.pass.cpp @@ -8,7 +8,6 @@ // ADDITIONAL_COMPILE_FLAGS(gcc): -Wno-bool-compare // ADDITIONAL_COMPILE_FLAGS(gcc-style-warnings): -Wno-sign-compare -// ADDITIONAL_COMPILE_FLAGS(character-conversion-warnings): -Wno-character-conversion // MSVC warning C4245: conversion from 'int' to 'wchar_t', signed/unsigned mismatch // MSVC warning C4305: truncation from 'int' to 'bool' // MSVC warning C4310: cast truncates constant value diff --git a/libcxx/test/std/localization/codecvt_unicode.pass.cpp b/libcxx/test/std/localization/codecvt_unicode.pass.cpp index fc5625d8ce4e9..e54c0c2a4610a 100644 --- a/libcxx/test/std/localization/codecvt_unicode.pass.cpp +++ b/libcxx/test/std/localization/codecvt_unicode.pass.cpp @@ -484,7 +484,7 @@ template void utf8_to_utf16_in_ok(const std::codecvt& cvt) { // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP const unsigned char input[] = "b\u0448\uAAAA\U0010AAAA"; - const InternT expected[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0}; + const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0}; static_assert(array_size(input) == 11, ""); static_assert(array_size(expected) == 6, ""); @@ -549,7 +549,7 @@ template void utf8_to_utf16_in_partial(const std::codecvt& cvt) { // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP const unsigned char input[] = "b\u0448\uAAAA\U0010AAAA"; - const InternT expected[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0}; + const char16_t expected[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0}; static_assert(array_size(input) == 11, ""); static_assert(array_size(expected) == 6, ""); @@ -618,7 +618,7 @@ template void utf8_to_utf16_in_error(const std::codecvt& cvt) { // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP, 4-byte CP const unsigned char input[] = "b\u0448\uD700\U0010AAAA"; - const InternT expected[] = {'b', 0x0448, 0xD700, 0xDBEA, 0xDEAA, 0}; + const char16_t expected[] = {'b', 0x0448, 0xD700, 0xDBEA, 0xDEAA, 0}; static_assert(array_size(input) == 11, ""); static_assert(array_size(expected) == 6, ""); @@ -765,7 +765,7 @@ void utf8_to_utf16_in(const std::codecvt& cvt) { template void utf16_to_utf8_out_ok(const std::codecvt& cvt) { // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP - const InternT input[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0}; + const char16_t input[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0}; const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA"; static_assert(array_size(input) == 6, ""); static_assert(array_size(expected) == 11, ""); @@ -801,7 +801,7 @@ void utf16_to_utf8_out_ok(const std::codecvt& cvt) template void utf16_to_utf8_out_partial(const std::codecvt& cvt) { // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP - const InternT input[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0}; + const char16_t input[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0}; const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA"; static_assert(array_size(input) == 6, ""); static_assert(array_size(expected) == 11, ""); @@ -860,7 +860,7 @@ void utf16_to_utf8_out_partial(const std::codecvt& template void utf16_to_utf8_out_error(const std::codecvt& cvt) { // UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP - const InternT input[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0}; + const char16_t input[] = {'b', 0x0448, 0xAAAA, 0xDBEA, 0xDEAA, 0}; const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA"; static_assert(array_size(input) == 6, ""); static_assert(array_size(expected) == 11, ""); diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_char8_t_in.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_char8_t_in.pass.cpp index 86a08ee32cb45..c34e864220e12 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_char8_t_in.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_char8_t_in.pass.cpp @@ -33,6 +33,6 @@ int main(int, char**) { assert(from_next - from == 9); assert(to_next - to == 9); for (unsigned i = 0; i < 9; ++i) - assert(to[i] == static_cast(from[i])); + assert(to[i] == from[i]); return 0; } diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_char8_t_out.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_char8_t_out.pass.cpp index d5c0c3cf31244..c39e64de7a59f 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_char8_t_out.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char16_t_char8_t_out.pass.cpp @@ -34,6 +34,6 @@ int main(int, char**) { assert(from_next - from == 9); assert(to_next - to == 9); for (unsigned i = 0; i < 9; ++i) - assert(static_cast(to[i]) == from[i]); + assert(to[i] == from[i]); return 0; } diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_char8_t_in.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_char8_t_in.pass.cpp index e6af982c10e99..e848f8a10912e 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_char8_t_in.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_char8_t_in.pass.cpp @@ -33,6 +33,6 @@ int main(int, char**) { assert(from_next - from == 9); assert(to_next - to == 9); for (unsigned i = 0; i < 9; ++i) - assert(to[i] == static_cast(from[i])); + assert(to[i] == from[i]); return 0; } diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_char8_t_out.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_char8_t_out.pass.cpp index 3cf46a436e2e7..7a31c9ef10558 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_char8_t_out.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.codecvt/locale.codecvt.members/char32_t_char8_t_out.pass.cpp @@ -34,6 +34,6 @@ int main(int, char**) { assert(from_next - from == 9); assert(to_next - to == 9); for (unsigned i = 0; i < 9; ++i) - assert(static_cast(to[i]) == from[i]); + assert(to[i] == from[i]); return 0; } diff --git a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char8_t/assign2.pass.cpp b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char8_t/assign2.pass.cpp index 971fcd68cc8e6..e3bc9c3c100d4 100644 --- a/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char8_t/assign2.pass.cpp +++ b/libcxx/test/std/strings/char.traits/char.traits.specializations/char.traits.specializations.char8_t/assign2.pass.cpp @@ -19,9 +19,9 @@ #ifndef TEST_HAS_NO_CHAR8_T constexpr bool test_constexpr() { - char8_t c = u8'1'; + char8_t c = u'1'; std::char_traits::assign(c, u'a'); - return c == u8'a'; + return c == u'a'; } int main(int, char**) { diff --git a/libcxx/utils/ci/run-buildbot b/libcxx/utils/ci/run-buildbot index d8b23be9a0323..5c809fafe2cf5 100755 --- a/libcxx/utils/ci/run-buildbot +++ b/libcxx/utils/ci/run-buildbot @@ -147,7 +147,6 @@ function generate-cmake() { generate-cmake-base \ -DLLVM_ENABLE_RUNTIMES="libcxx;libcxxabi;libunwind" \ -DLIBCXX_CXX_ABI=libcxxabi \ - -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ "${@}" } diff --git a/libcxx/utils/gdb/libcxx/printers.py b/libcxx/utils/gdb/libcxx/printers.py index e3d5d87aca325..31c27a1959cb2 100644 --- a/libcxx/utils/gdb/libcxx/printers.py +++ b/libcxx/utils/gdb/libcxx/printers.py @@ -673,7 +673,7 @@ def display_hint(self): return "map" def _get_key_value(self, node): - key_value = node.cast(self.util.cast_type).dereference()["__value_"] + key_value = _cc_field(node.cast(self.util.cast_type).dereference()) return [key_value["first"], key_value["second"]] @@ -738,7 +738,7 @@ def __init__(self, val): self._initialize(val["__i_"], _remove_generics(_prettify_typename(val.type))) def _get_node_value(self, node): - return node["__value_"] + return _cc_field(node) class SetIteratorPrinter(AbstractRBTreeIteratorPrinter): diff --git a/libcxx/utils/libcxx/test/features.py b/libcxx/utils/libcxx/test/features.py index 74746e37d3bc4..10fc4b0afde6b 100644 --- a/libcxx/utils/libcxx/test/features.py +++ b/libcxx/utils/libcxx/test/features.py @@ -144,10 +144,6 @@ def _mingwSupportsModules(cfg): when=lambda cfg: hasCompileFlag(cfg, "-Wuser-defined-warnings"), actions=[AddCompileFlag("-Wuser-defined-warnings")], ), - Feature( - name="character-conversion-warnings", - when=lambda cfg: hasCompileFlag(cfg, "-Wcharacter-conversion"), - ), # Tests to validate whether the compiler has a way to set the maximum number # of steps during constant evaluation. Since the flag differs per compiler # store the "valid" flag as a feature. This allows passing the proper compile diff --git a/lld/COFF/COFFLinkerContext.h b/lld/COFF/COFFLinkerContext.h index f45b754384ef9..2c5f6415e5d4b 100644 --- a/lld/COFF/COFFLinkerContext.h +++ b/lld/COFF/COFFLinkerContext.h @@ -50,14 +50,6 @@ class COFFLinkerContext : public CommonLinkerContext { f(symtab); } - // Invoke the specified callback for each active symbol table, - // skipping the native symbol table on pure ARM64EC targets. - void forEachActiveSymtab(std::function f) { - if (symtab.ctx.config.machine == ARM64X) - f(*hybridSymtab); - f(symtab); - } - std::vector objFileInstances; std::map pdbInputFileInstances; std::vector importFileInstances; diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp index 01752cdc6a9da..ff2bc40932c04 100644 --- a/lld/COFF/Chunks.cpp +++ b/lld/COFF/Chunks.cpp @@ -580,7 +580,7 @@ void SectionChunk::getBaserels(std::vector *res) { // to match the value in the EC load config, which is expected to be // a relocatable pointer to the __chpe_metadata symbol. COFFLinkerContext &ctx = file->symtab.ctx; - if (ctx.config.machine == ARM64X && ctx.hybridSymtab->loadConfigSym && + if (ctx.hybridSymtab && ctx.hybridSymtab->loadConfigSym && ctx.hybridSymtab->loadConfigSym->getChunk() == this && ctx.symtab.loadConfigSym && ctx.hybridSymtab->loadConfigSize >= diff --git a/lld/COFF/DLL.cpp b/lld/COFF/DLL.cpp index c327da28ce138..0440507b71756 100644 --- a/lld/COFF/DLL.cpp +++ b/lld/COFF/DLL.cpp @@ -560,8 +560,7 @@ class TailMergeChunkARM64 : public NonSectionCodeChunk { memcpy(buf, tailMergeARM64, sizeof(tailMergeARM64)); applyArm64Addr(buf + 44, desc->getRVA(), rva + 44, 12); applyArm64Imm(buf + 48, desc->getRVA() & 0xfff, 0); - if (helper) - applyArm64Branch26(buf + 52, helper->getRVA() - rva - 52); + applyArm64Branch26(buf + 52, helper->getRVA() - rva - 52); } Chunk *desc = nullptr; @@ -782,7 +781,6 @@ void IdataContents::create(COFFLinkerContext &ctx) { // ordinal values to the table. size_t base = lookups.size(); Chunk *lookupsTerminator = nullptr, *addressesTerminator = nullptr; - uint32_t nativeOnly = 0; for (DefinedImportData *s : syms) { uint16_t ord = s->getOrdinal(); HintNameChunk *hintChunk = nullptr; @@ -808,8 +806,8 @@ void IdataContents::create(COFFLinkerContext &ctx) { // the native terminator, they will be ignored in the native view. // In the EC view, they should act as terminators, so emit ZEROFILL // relocations overriding them. - if (ctx.config.machine == ARM64X && !lookupsTerminator && - s->file->isEC() && !s->file->hybridFile) { + if (ctx.hybridSymtab && !lookupsTerminator && s->file->isEC() && + !s->file->hybridFile) { lookupsTerminator = lookupsChunk; addressesTerminator = addressesChunk; lookupsChunk = make(ctx); @@ -843,7 +841,6 @@ void IdataContents::create(COFFLinkerContext &ctx) { // Fill the auxiliary IAT with null chunks for native-only imports. auxIat.push_back(make(ctx)); auxIatCopy.push_back(make(ctx)); - ++nativeOnly; } } // Terminate with null values. @@ -865,15 +862,18 @@ void IdataContents::create(COFFLinkerContext &ctx) { // Create the import table header. dllNames.push_back(make(syms[0]->getDLLName())); auto *dir = make(dllNames.back()); + dir->lookupTab = lookups[base]; + dir->addressTab = addresses[base]; + dirs.push_back(dir); - if (ctx.hybridSymtab && nativeOnly) { - if (ctx.config.machine != ARM64X) - // On pure ARM64EC targets, skip native-only imports in the import - // directory. - base += nativeOnly; - else if (nativeOnly) { - // If native-only imports exist, they will appear as a prefix to all - // imports. Emit ARM64X relocations to skip them in the EC view. + if (ctx.hybridSymtab) { + // If native-only imports exist, they will appear as a prefix to all + // imports. Emit ARM64X relocations to skip them in the EC view. + uint32_t nativeOnly = + llvm::find_if(syms, + [](DefinedImportData *s) { return s->file->isEC(); }) - + syms.begin(); + if (nativeOnly) { ctx.dynamicRelocs->add( IMAGE_DVRT_ARM64X_FIXUP_TYPE_DELTA, 0, Arm64XRelocVal( @@ -886,10 +886,6 @@ void IdataContents::create(COFFLinkerContext &ctx) { nativeOnly * sizeof(uint64_t)); } } - - dir->lookupTab = lookups[base]; - dir->addressTab = addresses[base]; - dirs.push_back(dir); } // Add null terminator. dirs.push_back(make(sizeof(ImportDirectoryTableEntry), 4)); @@ -926,25 +922,21 @@ void DelayLoadContents::create() { size_t base = addresses.size(); ctx.forEachSymtab([&](SymbolTable &symtab) { - if (symtab.isEC()) { - if (ctx.config.machine == ARM64X) { - // For hybrid images, emit null-terminated native import entries - // followed by null-terminated EC entries. If a view is missing - // imports for a given module, only terminators are emitted. Emit - // ARM64X relocations to skip native entries in the EC view. - ctx.dynamicRelocs->add( - IMAGE_DVRT_ARM64X_FIXUP_TYPE_DELTA, 0, - Arm64XRelocVal(dir, offsetof(delay_import_directory_table_entry, - DelayImportAddressTable)), - (addresses.size() - base) * sizeof(uint64_t)); - ctx.dynamicRelocs->add( - IMAGE_DVRT_ARM64X_FIXUP_TYPE_DELTA, 0, - Arm64XRelocVal(dir, offsetof(delay_import_directory_table_entry, - DelayImportNameTable)), - (addresses.size() - base) * sizeof(uint64_t)); - } else { - base = addresses.size(); - } + if (ctx.hybridSymtab && symtab.isEC()) { + // For hybrid images, emit null-terminated native import entries + // followed by null-terminated EC entries. If a view is missing imports + // for a given module, only terminators are emitted. Emit ARM64X + // relocations to skip native entries in the EC view. + ctx.dynamicRelocs->add( + IMAGE_DVRT_ARM64X_FIXUP_TYPE_DELTA, 0, + Arm64XRelocVal(dir, offsetof(delay_import_directory_table_entry, + DelayImportAddressTable)), + (addresses.size() - base) * sizeof(uint64_t)); + ctx.dynamicRelocs->add( + IMAGE_DVRT_ARM64X_FIXUP_TYPE_DELTA, 0, + Arm64XRelocVal(dir, offsetof(delay_import_directory_table_entry, + DelayImportNameTable)), + (addresses.size() - base) * sizeof(uint64_t)); } Chunk *tm = nullptr; @@ -989,7 +981,7 @@ void DelayLoadContents::create() { chunk = make(s->file); auxIatCopy.push_back(chunk); s->file->auxImpCopySym->setLocation(chunk); - } else if (ctx.config.machine == ARM64X) { + } else if (ctx.hybridSymtab) { // Fill the auxiliary IAT with null chunks for native imports. auxIat.push_back(make(ctx)); auxIatCopy.push_back(make(ctx)); @@ -1003,10 +995,6 @@ void DelayLoadContents::create() { symtab.addSynthetic(tmName, tm); } - // Skip terminators on pure ARM64EC target if there are no native imports. - if (!tm && !symtab.isEC() && ctx.config.machine != ARM64X) - return; - // Terminate with null values. addresses.push_back(make(ctx, 8)); names.push_back(make(ctx, 8)); @@ -1036,7 +1024,7 @@ void DelayLoadContents::create() { } Chunk *DelayLoadContents::newTailMergeChunk(SymbolTable &symtab, Chunk *dir) { - auto helper = cast_or_null(symtab.delayLoadHelper); + auto helper = cast(symtab.delayLoadHelper); switch (symtab.machine) { case AMD64: case ARM64EC: diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index 13e716d9958a0..4c296da35d667 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -190,6 +190,7 @@ static bool compatibleMachineType(COFFLinkerContext &ctx, MachineTypes mt) { case ARM64: return mt == ARM64 || mt == ARM64X; case ARM64EC: + return isArm64EC(mt) || mt == AMD64; case ARM64X: return isAnyArm64(mt) || mt == AMD64; case IMAGE_FILE_MACHINE_UNKNOWN: @@ -491,12 +492,6 @@ void LinkerDriver::parseDirectives(InputFile *file) { case OPT_alternatename: file->symtab.parseAlternateName(arg->getValue()); break; - case OPT_arm64xsameaddress: - if (!file->symtab.isEC()) - Warn(ctx) << arg->getSpelling() - << " is not allowed in non-ARM64EC files (" << toString(file) - << ")"; - break; case OPT_defaultlib: if (std::optional path = findLibIfNew(arg->getValue())) enqueuePath(*path, false, false); @@ -504,7 +499,7 @@ void LinkerDriver::parseDirectives(InputFile *file) { case OPT_entry: if (!arg->getValue()[0]) Fatal(ctx) << "missing entry point symbol name"; - ctx.forEachActiveSymtab([&](SymbolTable &symtab) { + ctx.forEachSymtab([&](SymbolTable &symtab) { symtab.entry = symtab.addGCRoot(symtab.mangle(arg->getValue()), true); }); break; @@ -662,13 +657,9 @@ void LinkerDriver::setMachine(MachineTypes machine) { ctx.config.machine = machine; - if (!isArm64EC(machine)) { + if (machine != ARM64X) { ctx.symtab.machine = machine; } else { - // Set up a hybrid symbol table on ARM64EC/ARM64X. This is primarily useful - // on ARM64X, where both the native and EC symbol tables are meaningful. - // However, since ARM64EC can include native object files, we also need to - // support a hybrid symbol table there. ctx.symtab.machine = ARM64EC; ctx.hybridSymtab.emplace(ctx, ARM64); } @@ -988,7 +979,7 @@ void LinkerDriver::createImportLibrary(bool asLib) { }; getExports(ctx.symtab, exports); - if (ctx.config.machine == ARM64X) + if (ctx.hybridSymtab) getExports(*ctx.hybridSymtab, nativeExports); std::string libName = getImportName(asLib); @@ -1392,13 +1383,13 @@ void LinkerDriver::maybeExportMinGWSymbols(const opt::InputArgList &args) { return; if (ctx.symtab.hadExplicitExports || - (ctx.config.machine == ARM64X && ctx.hybridSymtab->hadExplicitExports)) + (ctx.hybridSymtab && ctx.hybridSymtab->hadExplicitExports)) return; if (args.hasArg(OPT_exclude_all_symbols)) return; } - ctx.forEachActiveSymtab([&](SymbolTable &symtab) { + ctx.forEachSymtab([&](SymbolTable &symtab) { AutoExporter exporter(symtab, excludedSymbols); for (auto *arg : args.filtered(OPT_wholearchive_file)) @@ -2314,7 +2305,7 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { if (auto *arg = args.getLastArg(OPT_deffile)) { // parseModuleDefs mutates Config object. ctx.symtab.parseModuleDefs(arg->getValue()); - if (ctx.config.machine == ARM64X) { + if (ctx.hybridSymtab) { // MSVC ignores the /defArm64Native argument on non-ARM64X targets. // It is also ignored if the /def option is not specified. if (auto *arg = args.getLastArg(OPT_defarm64native)) @@ -2341,7 +2332,7 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { } // Handle /entry and /dll - ctx.forEachActiveSymtab([&](SymbolTable &symtab) { + ctx.forEachSymtab([&](SymbolTable &symtab) { llvm::TimeTraceScope timeScope("Entry point"); if (auto *arg = args.getLastArg(OPT_entry)) { if (!arg->getValue()[0]) @@ -2373,7 +2364,7 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { llvm::TimeTraceScope timeScope("Delay load"); for (auto *arg : args.filtered(OPT_delayload)) { config->delayLoads.insert(StringRef(arg->getValue()).lower()); - ctx.forEachActiveSymtab([&](SymbolTable &symtab) { + ctx.forEachSymtab([&](SymbolTable &symtab) { if (symtab.machine == I386) { symtab.delayLoadHelper = symtab.addGCRoot("___delayLoadHelper2@8"); } else { @@ -2547,9 +2538,7 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { u->setWeakAlias(symtab.addUndefined(to)); } } - }); - ctx.forEachActiveSymtab([&](SymbolTable &symtab) { // If any inputs are bitcode files, the LTO code generator may create // references to library functions that are not explicit in the bitcode // file's symbol table. If any of those library functions are defined in @@ -2579,7 +2568,7 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { // Handle /includeglob for (StringRef pat : args::getStrings(args, OPT_incl_glob)) - ctx.forEachActiveSymtab( + ctx.forEachSymtab( [&](SymbolTable &symtab) { symtab.addUndefinedGlob(pat); }); // Create wrapped symbols for -wrap option. @@ -2696,12 +2685,12 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { // need to create a .lib file. In MinGW mode, we only do that when the // -implib option is given explicitly, for compatibility with GNU ld. if (config->dll || !ctx.symtab.exports.empty() || - (ctx.config.machine == ARM64X && !ctx.hybridSymtab->exports.empty())) { + (ctx.hybridSymtab && !ctx.hybridSymtab->exports.empty())) { llvm::TimeTraceScope timeScope("Create .lib exports"); - ctx.forEachActiveSymtab([](SymbolTable &symtab) { symtab.fixupExports(); }); + ctx.forEachSymtab([](SymbolTable &symtab) { symtab.fixupExports(); }); if (!config->noimplib && (!config->mingw || !config->implib.empty())) createImportLibrary(/*asLib=*/false); - ctx.forEachActiveSymtab( + ctx.forEachSymtab( [](SymbolTable &symtab) { symtab.assignExportOrdinals(); }); } @@ -2767,8 +2756,7 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { if (ctx.symtab.isEC()) ctx.symtab.initializeECThunks(); - ctx.forEachActiveSymtab( - [](SymbolTable &symtab) { symtab.initializeLoadConfig(); }); + ctx.forEachSymtab([](SymbolTable &symtab) { symtab.initializeLoadConfig(); }); // Identify unreferenced COMDAT sections. if (config->doGC) { diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp index e10b6419b5ad5..7fb42bb681939 100644 --- a/lld/COFF/InputFiles.cpp +++ b/lld/COFF/InputFiles.cpp @@ -137,8 +137,10 @@ void ArchiveFile::parse() { ctx.symtab.addLazyArchive(this, sym); // Read both EC and native symbols on ARM64X. + if (!ctx.hybridSymtab) + return; archiveSymtab = &*ctx.hybridSymtab; - } else { + } else if (ctx.hybridSymtab) { // If the ECSYMBOLS section is missing in the archive, the archive could // be either a native-only ARM64 or x86_64 archive. Check the machine type // of the object containing a symbol to determine which symbol table to diff --git a/lld/COFF/Options.td b/lld/COFF/Options.td index a887d7d351e18..4e401a5fd1d6d 100644 --- a/lld/COFF/Options.td +++ b/lld/COFF/Options.td @@ -356,4 +356,3 @@ def tlbid : P_priv<"tlbid">; def tlbout : P_priv<"tlbout">; def verbose_all : P_priv<"verbose">; def guardsym : P_priv<"guardsym">; -def arm64xsameaddress : P_priv<"arm64xsameaddress">; diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp index d6f771284aa83..8fb0ee4e890d6 100644 --- a/lld/COFF/SymbolTable.cpp +++ b/lld/COFF/SymbolTable.cpp @@ -551,7 +551,7 @@ void SymbolTable::initializeLoadConfig() { Warn(ctx) << "EC version of '_load_config_used' is missing"; return; } - if (ctx.config.machine == ARM64X) { + if (ctx.hybridSymtab) { Warn(ctx) << "native version of '_load_config_used' is missing for " "ARM64X target"; return; diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index db6133e20a037..f3cf4902e6ecf 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -1374,7 +1374,7 @@ void Writer::createExportTable() { } } } - ctx.forEachActiveSymtab([&](SymbolTable &symtab) { + ctx.forEachSymtab([&](SymbolTable &symtab) { if (symtab.edataStart) { if (symtab.hadExplicitExports) Warn(ctx) << "literal .edata sections override exports"; @@ -1776,8 +1776,7 @@ template void Writer::writeHeader() { assert(coffHeaderOffset == buf - buffer->getBufferStart()); auto *coff = reinterpret_cast(buf); buf += sizeof(*coff); - SymbolTable &symtab = - ctx.config.machine == ARM64X ? *ctx.hybridSymtab : ctx.symtab; + SymbolTable &symtab = ctx.hybridSymtab ? *ctx.hybridSymtab : ctx.symtab; coff->Machine = symtab.isEC() ? AMD64 : symtab.machine; coff->NumberOfSections = ctx.outputSections.size(); coff->Characteristics = IMAGE_FILE_EXECUTABLE_IMAGE; @@ -2434,7 +2433,7 @@ void Writer::setECSymbols() { return a.first->getRVA() < b.first->getRVA(); }); - ChunkRange &chpePdata = ctx.config.machine == ARM64X ? hybridPdata : pdata; + ChunkRange &chpePdata = ctx.hybridSymtab ? hybridPdata : pdata; Symbol *rfeTableSym = ctx.symtab.findUnderscore("__arm64x_extra_rfe_table"); replaceSymbol(rfeTableSym, "__arm64x_extra_rfe_table", chpePdata.first); @@ -2479,7 +2478,7 @@ void Writer::setECSymbols() { delayIdata.getAuxIatCopy().empty() ? nullptr : delayIdata.getAuxIatCopy().front()); - if (ctx.config.machine == ARM64X) { + if (ctx.hybridSymtab) { // For the hybrid image, set the alternate entry point to the EC entry // point. In the hybrid view, it is swapped to the native entry point // using ARM64X relocations. @@ -2869,7 +2868,7 @@ void Writer::fixTlsAlignment() { } void Writer::prepareLoadConfig() { - ctx.forEachActiveSymtab([&](SymbolTable &symtab) { + ctx.forEachSymtab([&](SymbolTable &symtab) { if (!symtab.loadConfigSym) return; @@ -2929,7 +2928,7 @@ void Writer::prepareLoadConfig(SymbolTable &symtab, T *loadConfig) { IF_CONTAINS(CHPEMetadataPointer) { // On ARM64X, only the EC version of the load config contains // CHPEMetadataPointer. Copy its value to the native load config. - if (ctx.config.machine == ARM64X && !symtab.isEC() && + if (ctx.hybridSymtab && !symtab.isEC() && ctx.symtab.loadConfigSize >= offsetof(T, CHPEMetadataPointer) + sizeof(T::CHPEMetadataPointer)) { OutputSection *sec = diff --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp index e45dd4d354afb..e667fdc0633c5 100644 --- a/lld/ELF/Arch/ARM.cpp +++ b/lld/ELF/Arch/ARM.cpp @@ -1489,7 +1489,7 @@ template void elf::writeARMCmseImportLib(Ctx &ctx) { const uint64_t fileSize = sectionHeaderOff + shnum * sizeof(typename ELFT::Shdr); const unsigned flags = - ctx.arg.mmapOutputFile ? (unsigned)FileOutputBuffer::F_mmap : 0; + ctx.arg.mmapOutputFile ? 0 : (unsigned)FileOutputBuffer::F_no_mmap; unlinkAsync(ctx.arg.cmseOutputLib); Expected> bufferOrErr = FileOutputBuffer::create(ctx.arg.cmseOutputLib, fileSize, flags); diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 76a37b706c5fa..e8acdbefa32bb 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1421,7 +1421,7 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) { ctx.arg.mergeArmExidx = args.hasFlag(OPT_merge_exidx_entries, OPT_no_merge_exidx_entries, true); ctx.arg.mmapOutputFile = - args.hasFlag(OPT_mmap_output_file, OPT_no_mmap_output_file, false); + args.hasFlag(OPT_mmap_output_file, OPT_no_mmap_output_file, true); ctx.arg.nmagic = args.hasFlag(OPT_nmagic, OPT_no_nmagic, false); ctx.arg.noinhibitExec = args.hasArg(OPT_noinhibit_exec); ctx.arg.nostdlib = args.hasArg(OPT_nostdlib); diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 6a0552e808c7b..e2aebff20e174 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -2908,8 +2908,8 @@ template void Writer::openFile() { unsigned flags = 0; if (!ctx.arg.relocatable) flags |= FileOutputBuffer::F_executable; - if (ctx.arg.mmapOutputFile) - flags |= FileOutputBuffer::F_mmap; + if (!ctx.arg.mmapOutputFile) + flags |= FileOutputBuffer::F_no_mmap; Expected> bufferOrErr = FileOutputBuffer::create(ctx.arg.outputFile, fileSize, flags); diff --git a/lld/docs/ELF/warn_backrefs.rst b/lld/docs/ELF/warn_backrefs.rst index bb9d86ce685c0..fac2145cc0c0e 100644 --- a/lld/docs/ELF/warn_backrefs.rst +++ b/lld/docs/ELF/warn_backrefs.rst @@ -11,8 +11,8 @@ so far. When encountering an archive or an object file surrounded by symbol definitions; this may result in input files being loaded, updating the set of undefined symbol references. When all resolving definitions have been loaded from the archive, the linker moves on the next file and will not return -to it. This means that if an input file to the right of an archive cannot have -an undefined symbol resolved by an archive to the left of it. For example: +to it. This means that if an input file to the right of a archive cannot have +an undefined symbol resolved by a archive to the left of it. For example: ld def.a ref.o diff --git a/lld/docs/windows_support.rst b/lld/docs/windows_support.rst index 38688a9629227..e4640b4a5259a 100644 --- a/lld/docs/windows_support.rst +++ b/lld/docs/windows_support.rst @@ -86,7 +86,7 @@ MSBuild.exe had been shipped as a component of the .NET framework, but since 2013 it's part of Visual Studio. You can find it at "C:\\Program Files (x86)\\msbuild". -You can build LLD as a 64-bit application. To do that, open VS2013 x64 command +You can build LLD as a 64 bit application. To do that, open VS2013 x64 command prompt and run cmake for "Visual Studio 12 Win64" target. Using Ninja diff --git a/lld/test/COFF/arm64ec-entry-mangle.test b/lld/test/COFF/arm64ec-entry-mangle.test index 1f029077ba51d..6db16ef218dc8 100644 --- a/lld/test/COFF/arm64ec-entry-mangle.test +++ b/lld/test/COFF/arm64ec-entry-mangle.test @@ -97,7 +97,7 @@ RUN: not lld-link -machine:arm64ec -dll -out:test.dll demangled-func.obj loadcon RUN: "-entry:#func" 2>&1 | FileCheck -check-prefix=FUNC-NOT-FOUND %s RUN: not lld-link -machine:arm64ec -dll -out:test.dll demangled-func.obj loadconfig-arm64ec.obj \ RUN: -noentry "-export:#func" 2>&1 | FileCheck -check-prefix=FUNC-NOT-FOUND %s -FUNC-NOT-FOUND: undefined symbol: #func (EC symbol) +FUNC-NOT-FOUND: undefined symbol: #func Verify that the linker recognizes the demangled x86_64 _DllMainCRTStartup. RUN: lld-link -machine:arm64ec -dll -out:test.dll x64-dll-main.obj loadconfig-arm64ec.obj diff --git a/lld/test/COFF/arm64ec-hybmp.s b/lld/test/COFF/arm64ec-hybmp.s index 670ee3926ab5c..5fc24d4250704 100644 --- a/lld/test/COFF/arm64ec-hybmp.s +++ b/lld/test/COFF/arm64ec-hybmp.s @@ -62,7 +62,7 @@ thunk: // RUN: llvm-mc -filetype=obj -triple=arm64ec-windows undef-func.s -o undef-func.obj // RUN: not lld-link -machine:arm64ec -dll -noentry -out:test.dll undef-func.obj 2>&1 | FileCheck -check-prefix=UNDEF-FUNC %s -// UNDEF-FUNC: error: undefined symbol: func (EC symbol) +// UNDEF-FUNC: error: undefined symbol: func #--- undef-thunk.s .section .text,"xr",discard,func @@ -79,7 +79,7 @@ func: // RUN: llvm-mc -filetype=obj -triple=arm64ec-windows undef-thunk.s -o undef-thunk.obj // RUN: not lld-link -machine:arm64ec -dll -noentry -out:test.dll undef-thunk.obj 2>&1 | FileCheck -check-prefix=UNDEF-THUNK %s -// UNDEF-THUNK: error: undefined symbol: thunk (EC symbol) +// UNDEF-THUNK: error: undefined symbol: thunk #--- invalid-type.s .section .text,"xr",discard,func diff --git a/lld/test/COFF/arm64ec-lib.test b/lld/test/COFF/arm64ec-lib.test index 1e6fa60209d94..8698a5ceccbe7 100644 --- a/lld/test/COFF/arm64ec-lib.test +++ b/lld/test/COFF/arm64ec-lib.test @@ -29,13 +29,11 @@ RUN: lld-link -machine:arm64ec -dll -noentry -out:test2.dll symref-arm64ec.obj s Verify that both native and EC symbols can be referenced in a hybrid target. RUN: lld-link -machine:arm64x -dll -noentry -out:test3.dll symref-arm64ec.obj nsymref-aarch64.obj sym-arm64ec.lib \ RUN: loadconfig-arm64.obj loadconfig-arm64ec.obj -RUN: lld-link -machine:arm64ec -dll -noentry -out:test3ec.dll symref-arm64ec.obj nsymref-aarch64.obj sym-arm64ec.lib \ -RUN: loadconfig-arm64.obj loadconfig-arm64ec.obj Ensure that an EC symbol is not resolved using a regular symbol map. RUN: not lld-link -machine:arm64ec -dll -noentry -out:test-err.dll nsymref-arm64ec.obj sym-arm64ec.lib loadconfig-arm64ec.obj 2>&1 |\ RUN: FileCheck --check-prefix=ERR %s -ERR: error: undefined symbol: nsym (EC symbol) +ERR: error: undefined symbol: nsym Verify that a library symbol can be referenced, even if its name conflicts with an anti-dependency alias. RUN: lld-link -machine:arm64ec -dll -noentry -out:ref-alias-1.dll ref-alias.obj func.lib loadconfig-arm64ec.obj diff --git a/lld/test/COFF/arm64ec-patchable-thunks.test b/lld/test/COFF/arm64ec-patchable-thunks.test index 593405775ba87..1e1ff1f7f2ee4 100644 --- a/lld/test/COFF/arm64ec-patchable-thunks.test +++ b/lld/test/COFF/arm64ec-patchable-thunks.test @@ -57,7 +57,7 @@ RUN: llvm-readobj --coff-load-config test3.dll | FileCheck -check-prefix=PATCH-C RUN: not lld-link -out:test4.dll -machine:arm64ec test-sec.obj loadconfig-arm64ec.obj -dll -noentry 2>&1 | FileCheck --check-prefix=ERR %s -ERR: error: undefined symbol: EXP+#patchable_func (EC symbol) +ERR: error: undefined symbol: EXP+#patchable_func RUN: lld-link -out:testx.dll -machine:arm64x arm64ec-patchable.obj test-sec.obj \ diff --git a/lld/test/COFF/arm64ec-range-thunks.s b/lld/test/COFF/arm64ec-range-thunks.s index 955e19c30c2ae..dcfa6365b4e3a 100644 --- a/lld/test/COFF/arm64ec-range-thunks.s +++ b/lld/test/COFF/arm64ec-range-thunks.s @@ -79,11 +79,7 @@ # RUN: -out:testx2.dll -verbose 2>&1 | FileCheck -check-prefix=VERBOSEX %s # VERBOSEX: Added 5 thunks with margin {{.*}} in 1 passes -# RUN: lld-link -machine:arm64ec -noentry -dll funcs-arm64ec.obj funcs-aarch64.obj loadconfig-arm64.obj loadconfig-arm64ec.obj \ -# RUN: -out:testx2ec.dll -verbose 2>&1 | FileCheck -check-prefix=VERBOSEX %s - # RUN: llvm-objdump -d testx2.dll | FileCheck --check-prefix=DISASMX %s -# RUN: llvm-objdump -d testx2ec.dll | FileCheck --check-prefix=DISASMX %s # DISASMX: Disassembly of section .code1: # DISASMX-EMPTY: @@ -130,7 +126,6 @@ # DISASMX-NEXT: 180016010: d61f0200 br x16 # RUN: llvm-readobj --coff-load-config testx2.dll | FileCheck --check-prefix=LOADCFGX2 %s -# RUN: llvm-readobj --coff-load-config testx2ec.dll | FileCheck --check-prefix=LOADCFGX2 %s # LOADCFGX2: CodeMap [ # LOADCFGX2-NEXT: 0x4000 - 0x4014 ARM64EC diff --git a/lld/test/COFF/arm64ec.test b/lld/test/COFF/arm64ec.test index ea92689250063..75288e97e598d 100644 --- a/lld/test/COFF/arm64ec.test +++ b/lld/test/COFF/arm64ec.test @@ -35,15 +35,14 @@ RUN: llvm-readobj --file-headers test.dll | FileCheck -check-prefix=ARM64X-HEADE RUN: llvm-readobj --hex-dump=.data test.dll | FileCheck -check-prefix=ARM64X-DATA %s ARM64X-DATA: 03030303 01010101 02020202 -RUN: lld-link -out:test.dll -machine:arm64ec x86_64-data-sym.obj arm64-data-sym.obj \ -RUN: arm64ec-data-sym.obj arm64x-resource.obj -dll -noentry -RUN: llvm-readobj --file-headers test.dll | FileCheck -check-prefix=ARM64EC-HEADER %s -RUN: llvm-readobj --hex-dump=.data test.dll | FileCheck -check-prefix=ARM64X-DATA %s - RUN: not lld-link -out:test.dll -machine:arm64 arm64-data-sym.obj arm64ec-data-sym.obj \ RUN: -dll -noentry 2>&1 | FileCheck -check-prefix=INCOMPAT1 %s INCOMPAT1: lld-link: error: arm64ec-data-sym.obj: machine type arm64ec conflicts with arm64 +RUN: not lld-link -out:test.dll -machine:arm64ec arm64ec-data-sym.obj arm64-data-sym.obj \ +RUN: -dll -noentry 2>&1 | FileCheck -check-prefix=INCOMPAT2 %s +INCOMPAT2: lld-link: error: arm64-data-sym.obj: machine type arm64 conflicts with arm64ec + RUN: not lld-link -out:test.dll -machine:arm64 arm64-data-sym.obj x86_64-data-sym.obj \ RUN: -dll -noentry 2>&1 | FileCheck -check-prefix=INCOMPAT3 %s INCOMPAT3: lld-link: error: x86_64-data-sym.obj: machine type x64 conflicts with arm64 diff --git a/lld/test/COFF/arm64x-altnames.s b/lld/test/COFF/arm64x-altnames.s index db348f85b6b00..43a3f89db9a03 100644 --- a/lld/test/COFF/arm64x-altnames.s +++ b/lld/test/COFF/arm64x-altnames.s @@ -10,8 +10,6 @@ // RUN: not lld-link -out:out.dll -machine:arm64x -dll -noentry test-arm64.obj test-arm64ec.obj -alternatename:sym=altsym \ // RUN: 2>&1 | FileCheck --check-prefix=ERR-NATIVE %s -// RUN: not lld-link -out:out.dll -machine:arm64ec -dll -noentry test-arm64.obj test-arm64ec.obj -alternatename:sym=altsym \ -// RUN: 2>&1 | FileCheck --check-prefix=ERR-NATIVE %s // ERR-NATIVE-NOT: test-arm64ec.obj // ERR-NATIVE: lld-link: error: undefined symbol: sym (native symbol) @@ -22,13 +20,9 @@ // RUN: not lld-link -out:out.dll -machine:arm64x -dll -noentry test-arm64.obj test-arm64ec.obj drectve-arm64ec.obj \ // RUN: 2>&1 | FileCheck --check-prefix=ERR-NATIVE %s -// RUN: not lld-link -out:out.dll -machine:arm64ec -dll -noentry test-arm64.obj test-arm64ec.obj drectve-arm64ec.obj \ -// RUN: 2>&1 | FileCheck --check-prefix=ERR-NATIVE %s // RUN: not lld-link -out:out.dll -machine:arm64x -dll -noentry test-arm64.obj test-arm64ec.obj drectve-arm64.obj \ // RUN: 2>&1 | FileCheck --check-prefix=ERR-EC %s -// RUN: not lld-link -out:out.dll -machine:arm64ec -dll -noentry test-arm64.obj test-arm64ec.obj drectve-arm64.obj \ -// RUN: 2>&1 | FileCheck --check-prefix=ERR-EC %s // ERR-EC-NOT: test-arm64.obj // ERR-EC: lld-link: error: undefined symbol: sym (EC symbol) diff --git a/lld/test/COFF/arm64x-buildid.s b/lld/test/COFF/arm64x-buildid.s index 857bcae2c0566..99f50073eaa69 100644 --- a/lld/test/COFF/arm64x-buildid.s +++ b/lld/test/COFF/arm64x-buildid.s @@ -6,9 +6,6 @@ # RUN: llvm-readobj --hex-dump=.test %t.dll | FileCheck %s # CHECK: 0x180003000 3c100000 3c100000 -# RUN: lld-link -machine:arm64ec -dll -noentry %t-arm64.obj %t-arm64ec.obj -debug -build-id -Brepro -out:%t-ec.dll -# RUN: llvm-readobj --hex-dump=.test %t-ec.dll | FileCheck %s - .section .test,"dr" .rva __buildid diff --git a/lld/test/COFF/arm64x-comm.s b/lld/test/COFF/arm64x-comm.s index b950af5b70a44..830e3d3fdaaa1 100644 --- a/lld/test/COFF/arm64x-comm.s +++ b/lld/test/COFF/arm64x-comm.s @@ -8,9 +8,6 @@ // RUN: llvm-readobj --hex-dump=.test %t.dll | FileCheck %s // CHECK: 0x180004000 10200000 18200000 20200000 28200000 -// RUN: lld-link -machine:arm64ec -lldmingw -dll -noentry -out:%t-ec.dll %t-arm64.obj %t-arm64ec.obj -// RUN: llvm-readobj --hex-dump=.test %t-ec.dll | FileCheck %s - .data .word 0 diff --git a/lld/test/COFF/arm64x-crt-sec.s b/lld/test/COFF/arm64x-crt-sec.s index 45141ec238aea..5be70a1845f12 100644 --- a/lld/test/COFF/arm64x-crt-sec.s +++ b/lld/test/COFF/arm64x-crt-sec.s @@ -17,9 +17,6 @@ // RUN: lld-link -out:out3.dll -machine:arm64x -dll -noentry crt2-amd64.obj crt1-arm64ec.obj crt2-arm64.obj crt1-arm64.obj // RUN: llvm-readobj --hex-dump=.CRT out3.dll | FileCheck %s -// RUN: lld-link -out:out4.dll -machine:arm64ec -dll -noentry crt2-amd64.obj crt1-arm64ec.obj crt2-arm64.obj crt1-arm64.obj -// RUN: llvm-readobj --hex-dump=.CRT out4.dll | FileCheck %s - // CHECK: 0x180002000 01000000 00000000 02000000 00000000 // CHECK-NEXT: 0x180002010 03000000 00000000 11000000 00000000 // CHECK-NEXT: 0x180002020 12000000 00000000 13000000 00000000 diff --git a/lld/test/COFF/arm64x-ctors-sec.s b/lld/test/COFF/arm64x-ctors-sec.s index 3295b3f20b8b2..283d5f045260d 100644 --- a/lld/test/COFF/arm64x-ctors-sec.s +++ b/lld/test/COFF/arm64x-ctors-sec.s @@ -22,10 +22,6 @@ // RUN: ctor2-arm64.obj ctor1-arm64ec.obj ctor2-amd64.obj ctor1-arm64.obj // RUN: llvm-readobj --hex-dump=.rdata --hex-dump=.test out3.dll | FileCheck %s -// RUN: lld-link -out:out4.dll -machine:arm64ec -lldmingw -dll -noentry test-arm64.obj test-arm64ec.obj \ -// RUN: ctor2-arm64.obj ctor1-arm64ec.obj ctor2-amd64.obj ctor1-arm64.obj -// RUN: llvm-readobj --hex-dump=.rdata --hex-dump=.test out4.dll | FileCheck %s - // CHECK: Hex dump of section '.rdata': // CHECK-NEXT: 0x180001000 ffffffff ffffffff 01000000 00000000 // CHECK-NEXT: 0x180001010 02000000 00000000 03000000 00000000 diff --git a/lld/test/COFF/arm64x-guardcf.s b/lld/test/COFF/arm64x-guardcf.s index 9d307bde276e1..750bf0b3862c5 100644 --- a/lld/test/COFF/arm64x-guardcf.s +++ b/lld/test/COFF/arm64x-guardcf.s @@ -16,7 +16,7 @@ // RUN: lld-link -dll -noentry -machine:arm64x func-gfids-arm64.obj func-gfids-arm64ec.obj func-amd64.obj -guard:cf -out:out.dll \ // RUN: loadconfig-arm64ec.obj loadconfig-arm64.obj -// RUN: llvm-readobj --coff-load-config out.dll | FileCheck --check-prefixes=LOADCFG,LOADCFGX %s +// RUN: llvm-readobj --coff-load-config out.dll | FileCheck --check-prefix=LOADCFG %s // LOADCFG: LoadConfig [ // LOADCFG: GuardCFFunctionCount: 3 @@ -31,36 +31,28 @@ // LOADCFG-NEXT: 0x180002000 // LOADCFG-NEXT: 0x180003000 // LOADCFG-NEXT: ] -// LOADCFGX: HybridObject { -// LOADCFGX: LoadConfig [ -// LOADCFGX: GuardCFFunctionCount: 3 -// LOADCFG-NEXTX: GuardFlags [ (0x10500) -// LOADCFG-NEXTX: CF_FUNCTION_TABLE_PRESENT (0x400) -// LOADCFG-NEXTX: CF_INSTRUMENTED (0x100) -// LOADCFG-NEXTX: CF_LONGJUMP_TABLE_PRESENT (0x10000) -// LOADCFG-NEXTX: ] -// LOADCFGX: ] -// LOADCFGX: GuardFidTable [ -// LOADCFG-NEXTX: 0x180001000 -// LOADCFG-NEXTX: 0x180002000 -// LOADCFG-NEXTX: 0x180003000 -// LOADCFG-NEXTX: ] -// LOADCFGX: ] - -// RUN: lld-link -dll -noentry -machine:arm64ec func-gfids-arm64.obj func-gfids-arm64ec.obj func-amd64.obj -guard:cf -out:out-ec.dll \ -// RUN: loadconfig-arm64ec.obj loadconfig-arm64.obj -// RUN: llvm-readobj --coff-load-config out-ec.dll | FileCheck --check-prefix=LOADCFG %s +// LOADCFG: HybridObject { +// LOADCFG: LoadConfig [ +// LOADCFG: GuardCFFunctionCount: 3 +// LOADCFG-NEXT: GuardFlags [ (0x10500) +// LOADCFG-NEXT: CF_FUNCTION_TABLE_PRESENT (0x400) +// LOADCFG-NEXT: CF_INSTRUMENTED (0x100) +// LOADCFG-NEXT: CF_LONGJUMP_TABLE_PRESENT (0x10000) +// LOADCFG-NEXT: ] +// LOADCFG: ] +// LOADCFG: GuardFidTable [ +// LOADCFG-NEXT: 0x180001000 +// LOADCFG-NEXT: 0x180002000 +// LOADCFG-NEXT: 0x180003000 +// LOADCFG-NEXT: ] +// LOADCFG: ] // Check that exports from both views are present in CF guard tables. // RUN: lld-link -dll -noentry -machine:arm64x func-exp-arm64.obj func-exp-arm64ec.obj -guard:cf -out:out-exp.dll \ // RUN: loadconfig-arm64ec.obj loadconfig-arm64.obj -// RUN: llvm-readobj --coff-load-config out-exp.dll | FileCheck --check-prefixes=LOADCFG,LOADCFGX %s - -// RUN: lld-link -dll -noentry -machine:arm64ec func-exp-arm64.obj func-exp-arm64ec.obj -guard:cf -out:out-exp-ec.dll \ -// RUN: loadconfig-arm64ec.obj loadconfig-arm64.obj -// RUN: llvm-readobj --coff-load-config out-exp-ec.dll | FileCheck --check-prefixes=LOADCFG %s +// RUN: llvm-readobj --coff-load-config out-exp.dll | FileCheck --check-prefix=LOADCFG %s // Check that entry points from both views are present in CF guard tables. diff --git a/lld/test/COFF/arm64x-import.test b/lld/test/COFF/arm64x-import.test index 612b5f9b71de1..7441c71d87710 100644 --- a/lld/test/COFF/arm64x-import.test +++ b/lld/test/COFF/arm64x-import.test @@ -56,7 +56,7 @@ DISASM-12T-NEXT: 180002040: d65f03c0 ret DISASM-12T-NEXT: ... DISASM-12T-NEXT: 180003000: ff 25 fa 0f 00 00 jmpq *0xffa(%rip) # 0x180004000 -RUN: llvm-readobj --coff-imports test-12-thunks.dll | FileCheck --check-prefixes=IMPORTS-12,IMPORTS-12-EC %s +RUN: llvm-readobj --coff-imports test-12-thunks.dll | FileCheck --check-prefix=IMPORTS-12 %s IMPORTS-12: Import { IMPORTS-12-NEXT: Name: test.dll IMPORTS-12-NEXT: ImportLookupTableRVA: 0x5348 @@ -65,13 +65,13 @@ IMPORTS-12-NEXT: Symbol: func1 (0) IMPORTS-12-NEXT: Symbol: func2 (0) IMPORTS-12-NEXT: } IMPORTS-12-NEXT: HybridObject { -IMPORTS-12-EC: Import { -IMPORTS-12-EC-NEXT: Name: test.dll -IMPORTS-12-EC-NEXT: ImportLookupTableRVA: 0x5348 -IMPORTS-12-EC-NEXT: ImportAddressTableRVA: 0x4000 -IMPORTS-12-EC-NEXT: Symbol: func1 (0) -IMPORTS-12-EC-NEXT: Symbol: func2 (0) -IMPORTS-12-EC-NEXT:} +IMPORTS-12: Import { +IMPORTS-12-NEXT: Name: test.dll +IMPORTS-12-NEXT: ImportLookupTableRVA: 0x5348 +IMPORTS-12-NEXT: ImportAddressTableRVA: 0x4000 +IMPORTS-12-NEXT: Symbol: func1 (0) +IMPORTS-12-NEXT: Symbol: func2 (0) +IMPORTS-12-NEXT: } IMPORTS-12-NEXT: } RUN: llvm-readobj --hex-dump=.test test-12-thunks.dll | FileCheck --check-prefix=FUNC-12-THUNKS %s @@ -81,13 +81,6 @@ FUNC-12-THUNKS-NEXT: 0x180009010 08600000 08400000 RUN: llvm-readobj --hex-dump=.testa test-12-thunks.dll | FileCheck --check-prefix=FUNC-12-THUNKSA %s FUNC-12-THUNKSA: 0x18000a000 00400000 08400000 00100000 -RUN: lld-link -machine:arm64ec -dll -noentry -out:test-12-thunks-ec.dll loadconfig-arm64.obj loadconfig-arm64ec.obj \ -RUN: icall.obj func12-thunks-arm64ec.obj func12-thunks-arm64.obj imp-arm64ec.lib imp-arm64.lib -RUN: llvm-objdump -d test-12-thunks-ec.dll | FileCheck --check-prefix=DISASM-12T %s -RUN: llvm-readobj --hex-dump=.test test-12-thunks-ec.dll | FileCheck --check-prefix=FUNC-12-THUNKS %s -RUN: llvm-readobj --hex-dump=.testa test-12-thunks-ec.dll | FileCheck --check-prefix=FUNC-12-THUNKSA %s -RUN: llvm-readobj --coff-imports test-12-thunks-ec.dll | FileCheck --check-prefix=IMPORTS-12-EC %s - # If the ordinals of named imports don't match, use the EC value. @@ -153,7 +146,7 @@ IMPORTS-ORD2-NEXT: } RUN: lld-link -machine:arm64x -dll -noentry -out:test2.dll loadconfig-arm64.obj loadconfig-arm64ec.obj \ RUN: icall.obj func12-arm64ec.obj func123-arm64.obj imp-arm64x.lib -RUN: llvm-readobj --coff-imports test2.dll | FileCheck --check-prefixes=IMPORTS-123-12,IMPORTS-123-12-EC %s +RUN: llvm-readobj --coff-imports test2.dll | FileCheck --check-prefix=IMPORTS-123-12 %s IMPORTS-123-12: Import { IMPORTS-123-12-NEXT: Name: test.dll IMPORTS-123-12-NEXT: ImportLookupTableRVA: 0x3338 @@ -163,13 +156,13 @@ IMPORTS-123-12-NEXT: Symbol: func1 (0) IMPORTS-123-12-NEXT: Symbol: func2 (0) IMPORTS-123-12-NEXT: } IMPORTS-123-12-NEXT: HybridObject { -IMPORTS-123-12-EC: Import { -IMPORTS-123-12-EC-NEXT: Name: test.dll -IMPORTS-123-12-EC-NEXT: ImportLookupTableRVA: 0x3340 -IMPORTS-123-12-EC-NEXT: ImportAddressTableRVA: 0x2008 -IMPORTS-123-12-EC-NEXT: Symbol: func1 (0) -IMPORTS-123-12-EC-NEXT: Symbol: func2 (0) -IMPORTS-123-12-EC-NEXT:} +IMPORTS-123-12: Import { +IMPORTS-123-12-NEXT: Name: test.dll +IMPORTS-123-12-NEXT: ImportLookupTableRVA: 0x3340 +IMPORTS-123-12-NEXT: ImportAddressTableRVA: 0x2008 +IMPORTS-123-12-NEXT: Symbol: func1 (0) +IMPORTS-123-12-NEXT: Symbol: func2 (0) +IMPORTS-123-12-NEXT: } IMPORTS-123-12-NEXT: } RUN: llvm-readobj --hex-dump=.test test2.dll | FileCheck --check-prefix=TEST-123-12 %s @@ -182,20 +175,13 @@ RUN: llvm-readobj --hex-dump=.rdata test2.dll | FileCheck --check-prefix=TEST-12 TEST-123-12AUX: 0x180004000 00000000 00000000 08100080 01000000 TEST-123-12AUX-NEXT: 0x180004010 1c100080 01000000 00000000 00000000 -RUN: lld-link -machine:arm64ec -dll -noentry -out:test2-ec.dll loadconfig-arm64.obj loadconfig-arm64ec.obj \ -RUN: icall.obj func12-arm64ec.obj func123-arm64.obj imp-arm64x.lib -RUN: llvm-readobj --coff-imports test2-ec.dll | FileCheck --check-prefix=IMPORTS-123-12-EC %s -RUN: llvm-readobj --hex-dump=.test test2-ec.dll | FileCheck --check-prefix=TEST-123-12 %s -RUN: llvm-readobj --hex-dump=.testa test2-ec.dll | FileCheck --check-prefix=TEST-123-12A %s -RUN: llvm-readobj --hex-dump=.rdata test2-ec.dll | FileCheck --check-prefix=TEST-123-12AUX %s - # Link to the imported func1 and func2 from both native and EC code, and func3 from EC code. RUN: lld-link -machine:arm64x -dll -noentry -out:func-12-123.dll loadconfig-arm64.obj loadconfig-arm64ec.obj \ RUN: icall.obj func123-arm64ec.obj func12-arm64.obj imp-arm64x.lib -RUN: llvm-readobj --coff-imports func-12-123.dll | FileCheck --check-prefixes=IMPORTS-12-123,IMPORTS-12-123-EC %s +RUN: llvm-readobj --coff-imports func-12-123.dll | FileCheck --check-prefix=IMPORTS-12-123 %s IMPORTS-12-123: Import { IMPORTS-12-123-NEXT: Name: test.dll IMPORTS-12-123-NEXT: ImportLookupTableRVA: 0x3338 @@ -204,14 +190,14 @@ IMPORTS-12-123-NEXT: Symbol: func1 (0) IMPORTS-12-123-NEXT: Symbol: func2 (0) IMPORTS-12-123-NEXT: } IMPORTS-12-123-NEXT: HybridObject { -IMPORTS-12-123-EC: Import { -IMPORTS-12-123-EC-NEXT: Name: test.dll -IMPORTS-12-123-EC-NEXT: ImportLookupTableRVA: 0x3338 -IMPORTS-12-123-EC-NEXT: ImportAddressTableRVA: 0x2000 -IMPORTS-12-123-EC-NEXT: Symbol: func1 (0) -IMPORTS-12-123-EC-NEXT: Symbol: func2 (0) -IMPORTS-12-123-EC-NEXT: Symbol: func3 (0) -IMPORTS-12-123-EC-NEXT:} +IMPORTS-12-123: Import { +IMPORTS-12-123-NEXT: Name: test.dll +IMPORTS-12-123-NEXT: ImportLookupTableRVA: 0x3338 +IMPORTS-12-123-NEXT: ImportAddressTableRVA: 0x2000 +IMPORTS-12-123-NEXT: Symbol: func1 (0) +IMPORTS-12-123-NEXT: Symbol: func2 (0) +IMPORTS-12-123-NEXT: Symbol: func3 (0) +IMPORTS-12-123-NEXT: } IMPORTS-12-123-NEXT: } RUN: llvm-readobj --hex-dump=.test func-12-123.dll | FileCheck --check-prefix=TEST-12-123 %s @@ -225,12 +211,6 @@ RUN: llvm-readobj --hex-dump=.rdata func-12-123.dll | FileCheck --check-prefix=T TEST-12-123AUX: 0x180004000 08100080 01000000 1c100080 01000000 TEST-12-123AUX-NEXT: 0x180004010 30100080 01000000 00000000 00000000 -RUN: lld-link -machine:arm64ec -dll -noentry -out:func-12-123-ec.dll loadconfig-arm64.obj loadconfig-arm64ec.obj \ -RUN: icall.obj func123-arm64ec.obj func12-arm64.obj imp-arm64x.lib -RUN: llvm-readobj --coff-imports func-12-123-ec.dll | FileCheck --check-prefix=IMPORTS-12-123-EC %s -RUN: llvm-readobj --hex-dump=.test func-12-123-ec.dll | FileCheck --check-prefix=TEST-12-123 %s -RUN: llvm-readobj --hex-dump=.testa func-12-123-ec.dll | FileCheck --check-prefix=TEST-12-123A %s -RUN: llvm-readobj --hex-dump=.rdata func-12-123-ec.dll | FileCheck --check-prefix=TEST-12-123AUX %s # Link to the imported func2 and func3 from both native and EC code, func4 from native code, # and func1 from EC code. @@ -238,7 +218,7 @@ RUN: llvm-readobj --hex-dump=.rdata func-12-123-ec.dll | FileCheck --check-prefi RUN: lld-link -machine:arm64x -dll -noentry -out:test-234-123.dll loadconfig-arm64.obj loadconfig-arm64ec.obj \ RUN: icall.obj func123-arm64ec.obj func234-arm64.obj imp-arm64x.lib -RUN: llvm-readobj --coff-imports test-234-123.dll | FileCheck --check-prefixes=IMPORTS-234-123,IMPORTS-234-123-EC %s +RUN: llvm-readobj --coff-imports test-234-123.dll | FileCheck --check-prefix=IMPORTS-234-123 %s IMPORTS-234-123: Import { IMPORTS-234-123-NEXT: Name: test.dll IMPORTS-234-123-NEXT: ImportLookupTableRVA: 0x3338 @@ -248,14 +228,14 @@ IMPORTS-234-123-NEXT: Symbol: func2 (0) IMPORTS-234-123-NEXT: Symbol: func3 (0) IMPORTS-234-123-NEXT: } IMPORTS-234-123-NEXT: HybridObject { -IMPORTS-234-123-EC: Import { -IMPORTS-234-123-EC-NEXT: Name: test.dll -IMPORTS-234-123-EC-NEXT: ImportLookupTableRVA: 0x3340 -IMPORTS-234-123-EC-NEXT: ImportAddressTableRVA: 0x2008 -IMPORTS-234-123-EC-NEXT: Symbol: func2 (0) -IMPORTS-234-123-EC-NEXT: Symbol: func3 (0) -IMPORTS-234-123-EC-NEXT: Symbol: func1 (0) -IMPORTS-234-123-EC-NEXT:} +IMPORTS-234-123: Import { +IMPORTS-234-123-NEXT: Name: test.dll +IMPORTS-234-123-NEXT: ImportLookupTableRVA: 0x3340 +IMPORTS-234-123-NEXT: ImportAddressTableRVA: 0x2008 +IMPORTS-234-123-NEXT: Symbol: func2 (0) +IMPORTS-234-123-NEXT: Symbol: func3 (0) +IMPORTS-234-123-NEXT: Symbol: func1 (0) +IMPORTS-234-123-NEXT: } IMPORTS-234-123-NEXT: } RUN: llvm-readobj --hex-dump=.test test-234-123.dll | FileCheck --check-prefix=TEST-234-123 %s @@ -265,19 +245,13 @@ TEST-234-123-NEXT: 0x180007010 10400000 1020000 RUN: llvm-readobj --hex-dump=.testa test-234-123.dll | FileCheck --check-prefix=TEST-234-123A %s TEST-234-123A: 0x180008000 08200000 10200000 00200000 -RUN: lld-link -machine:arm64ec -dll -noentry -out:test-234-123-ec.dll loadconfig-arm64.obj loadconfig-arm64ec.obj \ -RUN: icall.obj func123-arm64ec.obj func234-arm64.obj imp-arm64x.lib -RUN: llvm-readobj --coff-imports test-234-123-ec.dll | FileCheck --check-prefix=IMPORTS-234-123-EC %s -RUN: llvm-readobj --hex-dump=.test test-234-123-ec.dll | FileCheck --check-prefix=TEST-234-123 %s -RUN: llvm-readobj --hex-dump=.testa test-234-123-ec.dll | FileCheck --check-prefix=TEST-234-123A %s - # Link to the imported func3 and func4 from native code, and func1 and func2 from EC code. RUN: lld-link -machine:arm64x -dll -noentry -out:test-34-12.dll loadconfig-arm64.obj loadconfig-arm64ec.obj \ RUN: icall.obj func12o-arm64ec.obj func34o-arm64.obj imp-arm64x.lib imp2.lib -RUN: llvm-readobj --coff-imports test-34-12.dll | FileCheck --check-prefixes=IMPORTS-34-12,IMPORTS-34-12-EC %s +RUN: llvm-readobj --coff-imports test-34-12.dll | FileCheck --check-prefix=IMPORTS-34-12 %s IMPORTS-34-12: Import { IMPORTS-34-12-NEXT: Name: test.dll IMPORTS-34-12-NEXT: ImportLookupTableRVA: 0x3350 @@ -292,19 +266,19 @@ IMPORTS-34-12-NEXT: ImportAddressTableRVA: 0x2028 IMPORTS-34-12-NEXT: Symbol: otherfunc (0) IMPORTS-34-12-NEXT: } IMPORTS-34-12-NEXT: HybridObject { -IMPORTS-34-12-EC: Import { -IMPORTS-34-12-EC-NEXT: Name: test.dll -IMPORTS-34-12-EC-NEXT: ImportLookupTableRVA: 0x3360 -IMPORTS-34-12-EC-NEXT: ImportAddressTableRVA: 0x2010 -IMPORTS-34-12-EC-NEXT: Symbol: func1 (0) -IMPORTS-34-12-EC-NEXT: Symbol: func2 (0) -IMPORTS-34-12-EC-NEXT:} -IMPORTS-34-12-EC-NEXT:Import { -IMPORTS-34-12-EC-NEXT: Name: test2.dll -IMPORTS-34-12-EC-NEXT: ImportLookupTableRVA: 0x3378 -IMPORTS-34-12-EC-NEXT: ImportAddressTableRVA: 0x2028 -IMPORTS-34-12-EC-NEXT: Symbol: otherfunc (0) -IMPORTS-34-12-EC-NEXT:} +IMPORTS-34-12: Import { +IMPORTS-34-12-NEXT: Name: test.dll +IMPORTS-34-12-NEXT: ImportLookupTableRVA: 0x3360 +IMPORTS-34-12-NEXT: ImportAddressTableRVA: 0x2010 +IMPORTS-34-12-NEXT: Symbol: func1 (0) +IMPORTS-34-12-NEXT: Symbol: func2 (0) +IMPORTS-34-12-NEXT: } +IMPORTS-34-12-NEXT: Import { +IMPORTS-34-12-NEXT: Name: test2.dll +IMPORTS-34-12-NEXT: ImportLookupTableRVA: 0x3378 +IMPORTS-34-12-NEXT: ImportAddressTableRVA: 0x2028 +IMPORTS-34-12-NEXT: Symbol: otherfunc (0) +IMPORTS-34-12-NEXT: } IMPORTS-34-12-NEXT: } RUN: llvm-readobj --hex-dump=.test test-34-12.dll | FileCheck --check-prefix=TEST-23-12 %s @@ -314,12 +288,6 @@ TEST-23-12-NEXT: 0x180007010 28400000 28200000 RUN: llvm-readobj --hex-dump=.testa test-34-12.dll | FileCheck --check-prefix=TEST-23-12A %s TEST-23-12A: 0x180008000 00200000 08200000 28200000 -RUN: lld-link -machine:arm64ec -dll -noentry -out:test-34-12-ec.dll loadconfig-arm64.obj loadconfig-arm64ec.obj \ -RUN: icall.obj func12o-arm64ec.obj func34o-arm64.obj imp-arm64x.lib imp2.lib -RUN: llvm-readobj --coff-imports test-34-12-ec.dll | FileCheck --check-prefix=IMPORTS-34-12-EC %s -RUN: llvm-readobj --hex-dump=.test test-34-12-ec.dll | FileCheck --check-prefix=TEST-23-12 %s -RUN: llvm-readobj --hex-dump=.testa test-34-12-ec.dll | FileCheck --check-prefix=TEST-23-12A %s - # Link only to imported EC functions, with no native imports. @@ -367,7 +335,7 @@ IMPORTS-EC12-NEXT: } RUN: lld-link -machine:arm64x -dll -noentry -out:test-n12.dll loadconfig-arm64.obj loadconfig-arm64ec.obj \ RUN: func12-arm64.obj imp-arm64x.lib -RUN: llvm-readobj --coff-imports test-n12.dll | FileCheck --check-prefixes=IMPORTS-N12,IMPORTS-N12-EC %s +RUN: llvm-readobj --coff-imports test-n12.dll | FileCheck --check-prefix=IMPORTS-N12 %s IMPORTS-N12: Arch: aarch64 IMPORTS-N12-NEXT: AddressSize: 64bit @@ -379,20 +347,16 @@ IMPORTS-N12-NEXT: Symbol: func1 (0) IMPORTS-N12-NEXT: Symbol: func2 (0) IMPORTS-N12-NEXT: } IMPORTS-N12-NEXT: HybridObject { -IMPORTS-N12-EC: Format: COFF-ARM64EC -IMPORTS-N12-EC-NEXT: Arch: aarch64 -IMPORTS-N12-EC-NEXT: AddressSize: 64bit -IMPORTS-N12-EC-NEXT: Import { -IMPORTS-N12-EC-NEXT: Name: test.dll -IMPORTS-N12-EC-NEXT: ImportLookupTableRVA: 0x2340 -IMPORTS-N12-EC-NEXT: ImportAddressTableRVA: 0x1010 -IMPORTS-N12-EC-NEXT: } +IMPORTS-N12-NEXT: Format: COFF-ARM64EC +IMPORTS-N12-NEXT: Arch: aarch64 +IMPORTS-N12-NEXT: AddressSize: 64bit +IMPORTS-N12-NEXT: Import { +IMPORTS-N12-NEXT: Name: test.dll +IMPORTS-N12-NEXT: ImportLookupTableRVA: 0x2340 +IMPORTS-N12-NEXT: ImportAddressTableRVA: 0x1010 +IMPORTS-N12-NEXT: } IMPORTS-N12-NEXT: } -RUN: lld-link -machine:arm64ec -dll -noentry -out:test-n12-ec.dll loadconfig-arm64.obj loadconfig-arm64ec.obj \ -RUN: func12-arm64.obj imp-arm64x.lib -RUN: llvm-readobj --coff-imports test-n12-ec.dll | FileCheck --check-prefix=IMPORTS-N12-EC %s - RUN: lld-link -machine:arm64x -dll -noentry -out:test-dup.dll loadconfig-arm64.obj loadconfig-arm64ec.obj icall.obj \ RUN: func12-arm64ec.obj func34-arm64.obj dup.lib diff --git a/lld/test/COFF/arm64x-sameaddress.test b/lld/test/COFF/arm64x-sameaddress.test deleted file mode 100644 index c69be9d268c3b..0000000000000 --- a/lld/test/COFF/arm64x-sameaddress.test +++ /dev/null @@ -1,56 +0,0 @@ -REQUIRES: aarch64 -RUN: split-file %s %t.dir && cd %t.dir - -RUN: llvm-mc -filetype=obj -triple=arm64ec-windows func-arm64ec.s -o func-arm64ec.obj -RUN: llvm-mc -filetype=obj -triple=aarch64-windows func-arm64.s -o func-arm64.obj -RUN: llvm-mc -filetype=obj -triple=arm64ec-windows drectve.s -o drectve.obj -RUN: llvm-mc -filetype=obj -triple=aarch64-windows drectve.s -o drectve-arm64.obj -RUN: llvm-mc -filetype=obj -triple=arm64ec-windows %S/Inputs/loadconfig-arm64ec.s -o loadconfig-arm64ec.obj -RUN: llvm-mc -filetype=obj -triple=aarch64-windows %S/Inputs/loadconfig-arm64.s -o loadconfig-arm64.obj - -RUN: lld-link -machine:arm64x -dll -noentry -out:out.dll loadconfig-arm64.obj loadconfig-arm64ec.obj \ -RUN: func-arm64.obj func-arm64ec.obj drectve.obj - -RUN: lld-link -machine:arm64x -dll -noentry -out:out-cmd.dll loadconfig-arm64.obj loadconfig-arm64ec.obj \ -RUN: func-arm64.obj func-arm64ec.obj -arm64xsameaddress:func - -RUN: lld-link -machine:arm64ec -dll -noentry -out:out-ec.dll loadconfig-arm64ec.obj func-arm64ec.obj drectve.obj - -RUN: lld-link -machine:arm64x -dll -noentry -out:out-warn.dll loadconfig-arm64.obj loadconfig-arm64ec.obj \ -RUN: func-arm64.obj func-arm64ec.obj drectve-arm64.obj 2>&1 | FileCheck --check-prefix=WARN %s -WARN: lld-link: warning: -arm64xsameaddress: is not allowed in non-ARM64EC files (drectve-arm64.obj) - -#--- func-arm64.s - .section .text,"xr",discard,func - .globl func -func: - mov x0, #1 - ret - -#--- func-arm64ec.s - .section .text,"xr",discard,"#func" - .globl "#func" -"#func": - mov x0, #2 - ret - - .weak_anti_dep func - .set func,"#func" - - .section .wowthk,"xr",discard,entry_thunk - .globl entry_thunk -entry_thunk: - mov x0, #3 - ret - - .section .test,"dr" - .rva func - - .section .hybmp$x,"yi" - .symidx "#func" - .symidx entry_thunk - .word 1 - -#--- drectve.s - .section .drectve, "yn" - .ascii " -arm64xsameaddress:func" diff --git a/lld/test/COFF/arm64x-symtab.s b/lld/test/COFF/arm64x-symtab.s index 176e81a23be16..c634f8a6ed4c5 100644 --- a/lld/test/COFF/arm64x-symtab.s +++ b/lld/test/COFF/arm64x-symtab.s @@ -18,8 +18,6 @@ // RUN: not lld-link -machine:arm64x -dll -noentry -out:err1.dll symref-aarch64.obj sym-arm64ec.obj \ // RUN: 2>&1 | FileCheck --check-prefix=UNDEF %s -// RUN: not lld-link -machine:arm64ec -dll -noentry -out:err1.dll symref-aarch64.obj sym-arm64ec.obj \ -// RUN: 2>&1 | FileCheck --check-prefix=UNDEF %s // UNDEF: lld-link: error: undefined symbol: sym (native symbol) // UNDEF-NEXT: >>> referenced by symref-aarch64.obj:(.data) @@ -27,34 +25,25 @@ // RUN: not lld-link -machine:arm64x -dll -noentry -out:out.dll symref-arm64ec.obj sym-aarch64.obj \ // RUN: 2>&1 | FileCheck --check-prefix=UNDEFEC %s -// RUN: not lld-link -machine:arm64ec -dll -noentry -out:out.dll symref-arm64ec.obj sym-aarch64.obj \ -// RUN: 2>&1 | FileCheck --check-prefix=UNDEFEC %s // UNDEFEC: lld-link: error: undefined symbol: sym (EC symbol) // UNDEFEC-NEXT: >>> referenced by symref-arm64ec.obj:(.data) // RUN: not lld-link -machine:arm64x -dll -noentry -out:out.dll symref-x86_64.obj sym-aarch64.obj \ // RUN: 2>&1 | FileCheck --check-prefix=UNDEFX86 %s -// RUN: not lld-link -machine:arm64ec -dll -noentry -out:out.dll symref-x86_64.obj sym-aarch64.obj \ -// RUN: 2>&1 | FileCheck --check-prefix=UNDEFX86 %s // UNDEFX86: lld-link: error: undefined symbol: sym (EC symbol) // UNDEFX86-NEXT: >>> referenced by symref-x86_64.obj:(.data) // RUN: not lld-link -machine:arm64x -dll -noentry -out:err2.dll symref-aarch64.obj sym-x86_64.obj \ // RUN: 2>&1 | FileCheck --check-prefix=UNDEF %s -// RUN: not lld-link -machine:arm64ec -dll -noentry -out:err2.dll symref-aarch64.obj sym-x86_64.obj \ -// RUN: 2>&1 | FileCheck --check-prefix=UNDEF %s // Check that ARM64X target can have the same symbol names in both native and EC namespaces. // RUN: lld-link -machine:arm64x -dll -noentry -out:out.dll symref-aarch64.obj sym-aarch64.obj \ // RUN: symref-arm64ec.obj sym-x86_64.obj -// RUN: lld-link -machine:arm64ec -dll -noentry -out:out.dll symref-aarch64.obj sym-aarch64.obj \ -// RUN: symref-arm64ec.obj sym-x86_64.obj // Check that ARM64X target can reference both native and EC symbols from an archive. // RUN: lld-link -machine:arm64x -dll -noentry -out:out2.dll symref-aarch64.obj symref-arm64ec.obj sym.lib -// RUN: lld-link -machine:arm64ec -dll -noentry -out:out2.dll symref-aarch64.obj symref-arm64ec.obj sym.lib // Check that EC object files can reference x86_64 library symbols. @@ -66,20 +55,15 @@ // RUN: not lld-link -machine:arm64x -dll -noentry -out:err3.dll symref-aarch64.obj sym-x86_64.lib \ // RUN: 2>&1 | FileCheck --check-prefix=UNDEF %s -// RUN: not lld-link -machine:arm64ec -dll -noentry -out:err3.dll symref-aarch64.obj sym-x86_64.lib \ -// RUN: 2>&1 | FileCheck --check-prefix=UNDEF %s // Check that native object files can reference native library symbols. // RUN: lld-link -machine:arm64x -dll -noentry -out:out6.dll symref-aarch64.obj sym-aarch64.lib -// RUN: lld-link -machine:arm64ec -dll -noentry -out:out6.dll symref-aarch64.obj sym-aarch64.lib // Check that EC object files can't reference native ARM64 library symbols. // RUN: not lld-link -machine:arm64x -dll -noentry -out:err4.dll symref-arm64ec.obj sym-aarch64.lib \ // RUN: 2>&1 | FileCheck --check-prefix=UNDEFEC %s -// RUN: not lld-link -machine:arm64ec -dll -noentry -out:err4.dll symref-arm64ec.obj sym-aarch64.lib \ -// RUN: 2>&1 | FileCheck --check-prefix=UNDEFEC %s #--- symref.s .data diff --git a/lld/test/COFF/arm64x-wrap.s b/lld/test/COFF/arm64x-wrap.s index 5530bc47c884e..4f600e38f7a83 100644 --- a/lld/test/COFF/arm64x-wrap.s +++ b/lld/test/COFF/arm64x-wrap.s @@ -15,10 +15,6 @@ // CHECK: 0x180004000 02000000 02000000 01000000 02000000 // CHECK: 0x180004010 02000000 01000000 -// RUN: lld-link -machine:arm64ec -dll -noentry test-arm64.obj test-arm64ec.obj other-arm64.obj other-arm64ec.obj \ -// RUN: loadconfig-arm64.obj loadconfig-arm64ec.obj -out:out-ec.dll -wrap:sym -wrap:nosuchsym -// RUN: llvm-readobj --hex-dump=.test out-ec.dll | FileCheck %s - #--- test.s .section .test,"dr" .word sym diff --git a/lld/test/COFF/autoimport-arm64ec-data.test b/lld/test/COFF/autoimport-arm64ec-data.test index 52e64d6020a83..1f22ca4917557 100644 --- a/lld/test/COFF/autoimport-arm64ec-data.test +++ b/lld/test/COFF/autoimport-arm64ec-data.test @@ -12,7 +12,7 @@ RUN: llvm-objdump -s out.dll | FileCheck --check-prefix=CONTENTS %s IMPORTS: Import { IMPORTS-NEXT: Name: test.dll -IMPORTS-NEXT: ImportLookupTableRVA: 0x4100 +IMPORTS-NEXT: ImportLookupTableRVA: 0x40E0 IMPORTS-NEXT: ImportAddressTableRVA: 0x3000 IMPORTS-NEXT: Symbol: variable (0) IMPORTS-NEXT: } diff --git a/lldb/include/lldb/Core/Address.h b/lldb/include/lldb/Core/Address.h index 85b2ab7bb3cfe..9b5874f8b1fbe 100644 --- a/lldb/include/lldb/Core/Address.h +++ b/lldb/include/lldb/Core/Address.h @@ -371,15 +371,22 @@ class Address { bool ResolveAddressUsingFileSections(lldb::addr_t addr, const SectionList *sections); - /// Resolve this address to its containing function. + /// Resolve this address to its containing function and optionally get + /// that function's address range. /// /// \param[out] sym_ctx /// The symbol context describing the function in which this address lies /// + /// \parm[out] addr_range_ptr + /// Pointer to the AddressRange to fill in with the function's address + /// range. Caller may pass null if they don't need the address range. + /// /// \return - /// Returns \b false if the function/symbol could not be resolved; + /// Returns \b false if the function/symbol could not be resolved + /// or if the address range was requested and could not be resolved; /// returns \b true otherwise. - bool ResolveFunctionScope(lldb_private::SymbolContext &sym_ctx); + bool ResolveFunctionScope(lldb_private::SymbolContext &sym_ctx, + lldb_private::AddressRange *addr_range_ptr = nullptr); /// Set the address to represent \a load_addr. /// diff --git a/lldb/source/Core/Address.cpp b/lldb/source/Core/Address.cpp index a967bf5491211..1dab874a96583 100644 --- a/lldb/source/Core/Address.cpp +++ b/lldb/source/Core/Address.cpp @@ -263,11 +263,22 @@ bool Address::ResolveAddressUsingFileSections(addr_t file_addr, return false; // Failed to resolve this address to a section offset value } -bool Address::ResolveFunctionScope(SymbolContext &sym_ctx) { +/// if "addr_range_ptr" is not NULL, then fill in with the address range of the function. +bool Address::ResolveFunctionScope(SymbolContext &sym_ctx, + AddressRange *addr_range_ptr) { constexpr SymbolContextItem resolve_scope = eSymbolContextFunction | eSymbolContextSymbol; - return CalculateSymbolContext(&sym_ctx, resolve_scope) & resolve_scope; + if (!(CalculateSymbolContext(&sym_ctx, resolve_scope) & resolve_scope)) { + if (addr_range_ptr) + addr_range_ptr->Clear(); + return false; + } + + if (!addr_range_ptr) + return true; + + return sym_ctx.GetAddressRange(resolve_scope, 0, false, *addr_range_ptr); } ModuleSP Address::GetModule() const { diff --git a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp index 578ab12268ea3..1270d57423c7b 100644 --- a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp +++ b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp @@ -872,7 +872,6 @@ void DynamicLoaderDarwin::PrivateInitialize(Process *process) { StateAsCString(m_process->GetState())); Clear(true); m_process = process; - m_process->GetTarget().ClearAllLoadedSections(); } // Member function that gets called when the process state changes. diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ASTStructExtractor.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ASTStructExtractor.cpp index 451cf40e2818d..a2722db5d24a0 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ASTStructExtractor.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ASTStructExtractor.cpp @@ -123,7 +123,8 @@ void ASTStructExtractor::ExtractFromTopLevelDecl(Decl *D) { FunctionDecl *function_decl = dyn_cast(D); if (m_ast_context && function_decl && - m_function.m_wrapper_function_name == function_decl->getNameAsString()) { + !m_function.m_wrapper_function_name.compare( + function_decl->getNameAsString())) { ExtractFromFunctionDecl(function_decl); } } diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index e3a866e2b6d48..a3e809f44ed23 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -2479,8 +2479,8 @@ Function *DWARFASTParserClang::ParseFunctionFromDWARF( std::unique_ptr decl_up; if (decl_file || decl_line || decl_column) decl_up = std::make_unique( - die.GetCU()->GetFile(decl_file.value_or(0)), decl_line.value_or(0), - decl_column.value_or(0)); + die.GetCU()->GetFile(decl_file ? *decl_file : 0), + decl_line ? *decl_line : 0, decl_column ? *decl_column : 0); SymbolFileDWARF *dwarf = die.GetDWARF(); // Supply the type _only_ if it has already been parsed diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index 0fc7f79be70ec..907d63eb51afe 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -1358,15 +1358,15 @@ size_t SymbolFileDWARF::ParseBlocksRecursive(CompileUnit &comp_unit, if (decl_file || decl_line || decl_column) decl_up = std::make_unique( comp_unit.GetSupportFiles().GetFileSpecAtIndex( - decl_file.value_or(0)), - decl_line.value_or(0), decl_column.value_or(0)); + decl_file ? *decl_file : 0), + decl_line ? *decl_line : 0, decl_column ? *decl_column : 0); std::unique_ptr call_up; if (call_file || call_line || call_column) call_up = std::make_unique( comp_unit.GetSupportFiles().GetFileSpecAtIndex( - call_file.value_or(0)), - call_line.value_or(0), call_column.value_or(0)); + call_file ? *call_file : 0), + call_line ? *call_line : 0, call_column ? *call_column : 0); block->SetInlinedFunctionInfo(name, mangled_name, decl_up.get(), call_up.get()); diff --git a/lldb/source/Target/DynamicRegisterInfo.cpp b/lldb/source/Target/DynamicRegisterInfo.cpp index b964dc5877a97..9ad98a41c688c 100644 --- a/lldb/source/Target/DynamicRegisterInfo.cpp +++ b/lldb/source/Target/DynamicRegisterInfo.cpp @@ -497,7 +497,10 @@ void DynamicRegisterInfo::Finalize(const ArchSpec &arch) { pos != end; ++pos) { if (pos->second.size() > 1) { llvm::sort(pos->second); - pos->second.erase(llvm::unique(pos->second), pos->second.end()); + reg_num_collection::iterator unique_end = + std::unique(pos->second.begin(), pos->second.end()); + if (unique_end != pos->second.end()) + pos->second.erase(unique_end, pos->second.end()); } assert(!pos->second.empty()); if (pos->second.back() != LLDB_INVALID_REGNUM) diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp index 13ff12b4ff953..f136271a3b8a8 100644 --- a/lldb/source/Target/Process.cpp +++ b/lldb/source/Target/Process.cpp @@ -2675,6 +2675,7 @@ Status Process::LaunchPrivate(ProcessLaunchInfo &launch_info, StateType &state, m_jit_loaders_up.reset(); m_system_runtime_up.reset(); m_os_up.reset(); + GetTarget().ClearAllLoadedSections(); { std::lock_guard guard(m_process_input_reader_mutex); @@ -2799,6 +2800,7 @@ Status Process::LaunchPrivate(ProcessLaunchInfo &launch_info, StateType &state, } Status Process::LoadCore() { + GetTarget().ClearAllLoadedSections(); Status error = DoLoadCore(); if (error.Success()) { ListenerSP listener_sp( @@ -3094,6 +3096,8 @@ void Process::CompleteAttach() { Log *log(GetLog(LLDBLog::Process | LLDBLog::Target)); LLDB_LOGF(log, "Process::%s()", __FUNCTION__); + GetTarget().ClearAllLoadedSections(); + // Let the process subclass figure out at much as it can about the process // before we go looking for a dynamic loader plug-in. ArchSpec process_arch; diff --git a/lldb/source/Target/RegisterContextUnwind.cpp b/lldb/source/Target/RegisterContextUnwind.cpp index 2958923a98010..cf4b96c6eda9f 100644 --- a/lldb/source/Target/RegisterContextUnwind.cpp +++ b/lldb/source/Target/RegisterContextUnwind.cpp @@ -160,7 +160,8 @@ void RegisterContextUnwind::InitializeZerothFrame() { UnwindLogMsg("using architectural default unwind method"); } - m_sym_ctx_valid = m_current_pc.ResolveFunctionScope(m_sym_ctx); + AddressRange addr_range; + m_sym_ctx_valid = m_current_pc.ResolveFunctionScope(m_sym_ctx, &addr_range); if (m_sym_ctx.symbol) { UnwindLogMsg("with pc value of 0x%" PRIx64 ", symbol name is '%s'", @@ -184,9 +185,15 @@ void RegisterContextUnwind::InitializeZerothFrame() { // If we were able to find a symbol/function, set addr_range to the bounds of // that symbol/function. else treat the current pc value as the start_pc and // record no offset. - if (m_sym_ctx_valid) { - m_start_pc = m_sym_ctx.GetFunctionOrSymbolAddress(); - if (m_current_pc.GetModule() == m_start_pc.GetModule()) { + if (addr_range.GetBaseAddress().IsValid()) { + m_start_pc = addr_range.GetBaseAddress(); + if (m_current_pc.GetSection() == m_start_pc.GetSection()) { + m_current_offset = m_current_pc.GetOffset() - m_start_pc.GetOffset(); + } else if (m_current_pc.GetModule() == m_start_pc.GetModule()) { + // This means that whatever symbol we kicked up isn't really correct --- + // we should not cross section boundaries ... We really should NULL out + // the function/symbol in this case unless there is a bad assumption here + // due to inlined functions? m_current_offset = m_current_pc.GetFileAddress() - m_start_pc.GetFileAddress(); } @@ -492,7 +499,8 @@ void RegisterContextUnwind::InitializeNonZerothFrame() { return; } - m_sym_ctx_valid = m_current_pc.ResolveFunctionScope(m_sym_ctx); + AddressRange addr_range; + m_sym_ctx_valid = m_current_pc.ResolveFunctionScope(m_sym_ctx, &addr_range); if (m_sym_ctx.symbol) { UnwindLogMsg("with pc value of 0x%" PRIx64 ", symbol name is '%s'", pc, @@ -516,8 +524,9 @@ void RegisterContextUnwind::InitializeNonZerothFrame() { // Don't decrement if we're "above" an asynchronous event like // sigtramp. decr_pc_and_recompute_addr_range = false; - } else if (Address addr = m_sym_ctx.GetFunctionOrSymbolAddress(); - addr != m_current_pc) { + } else if (!addr_range.GetBaseAddress().IsValid() || + addr_range.GetBaseAddress().GetSection() != m_current_pc.GetSection() || + addr_range.GetBaseAddress().GetOffset() != m_current_pc.GetOffset()) { // If our "current" pc isn't the start of a function, decrement the pc // if we're up the stack. if (m_behaves_like_zeroth_frame) @@ -550,7 +559,7 @@ void RegisterContextUnwind::InitializeNonZerothFrame() { Address temporary_pc; temporary_pc.SetLoadAddress(pc - 1, &process->GetTarget()); m_sym_ctx.Clear(false); - m_sym_ctx_valid = temporary_pc.ResolveFunctionScope(m_sym_ctx); + m_sym_ctx_valid = temporary_pc.ResolveFunctionScope(m_sym_ctx, &addr_range); UnwindLogMsg("Symbol is now %s", GetSymbolOrFunctionName(m_sym_ctx).AsCString("")); @@ -559,8 +568,8 @@ void RegisterContextUnwind::InitializeNonZerothFrame() { // If we were able to find a symbol/function, set addr_range_ptr to the // bounds of that symbol/function. else treat the current pc value as the // start_pc and record no offset. - if (m_sym_ctx_valid) { - m_start_pc = m_sym_ctx.GetFunctionOrSymbolAddress(); + if (addr_range.GetBaseAddress().IsValid()) { + m_start_pc = addr_range.GetBaseAddress(); m_current_offset = pc - m_start_pc.GetLoadAddress(&process->GetTarget()); m_current_offset_backed_up_one = m_current_offset; if (decr_pc_and_recompute_addr_range && @@ -1943,7 +1952,8 @@ void RegisterContextUnwind::PropagateTrapHandlerFlagFromUnwindPlan( GetSymbolOrFunctionName(m_sym_ctx).AsCString("")); m_current_offset_backed_up_one = m_current_offset; - m_sym_ctx_valid = m_current_pc.ResolveFunctionScope(m_sym_ctx); + AddressRange addr_range; + m_sym_ctx_valid = m_current_pc.ResolveFunctionScope(m_sym_ctx, &addr_range); UnwindLogMsg("Symbol is now %s", GetSymbolOrFunctionName(m_sym_ctx).AsCString("")); @@ -1952,11 +1962,9 @@ void RegisterContextUnwind::PropagateTrapHandlerFlagFromUnwindPlan( Process *process = exe_ctx.GetProcessPtr(); Target *target = &process->GetTarget(); - if (m_sym_ctx_valid) { - m_start_pc = m_sym_ctx.GetFunctionOrSymbolAddress(); - m_current_offset = m_current_pc.GetLoadAddress(target) - - m_start_pc.GetLoadAddress(target); - } + m_start_pc = addr_range.GetBaseAddress(); + m_current_offset = + m_current_pc.GetLoadAddress(target) - m_start_pc.GetLoadAddress(target); } } diff --git a/lldb/test/API/lang/cpp/dynamic-value/TestDynamicValue.py b/lldb/test/API/lang/cpp/dynamic-value/TestDynamicValue.py index cd95a9ff3fe8c..faa35421ff60b 100644 --- a/lldb/test/API/lang/cpp/dynamic-value/TestDynamicValue.py +++ b/lldb/test/API/lang/cpp/dynamic-value/TestDynamicValue.py @@ -282,7 +282,6 @@ def test_from_forward_decl(self): @no_debug_info_test @expectedFailureAll(oslist=["windows"], bugnumber="llvm.org/pr24663") - @expectedFailureDarwin # dynamic loader unloads modules @expectedFailureAll(archs=["arm"]) # Minidump saving not implemented def test_from_core_file(self): """Test fetching C++ dynamic values from core files. Specifically, test diff --git a/lldb/test/Shell/Unwind/Inputs/basic-block-sections-with-dwarf.s b/lldb/test/Shell/Unwind/Inputs/basic-block-sections-with-dwarf.s index a7b5431a7afaf..ede04c88a030f 100644 --- a/lldb/test/Shell/Unwind/Inputs/basic-block-sections-with-dwarf.s +++ b/lldb/test/Shell/Unwind/Inputs/basic-block-sections-with-dwarf.s @@ -19,16 +19,19 @@ baz: .Lbaz_end: .size baz, .Lbaz_end-baz -foo.__part.3: + .type foo,@function +foo: .cfi_startproc - .cfi_def_cfa_offset 32 + pushq %rbx + .cfi_def_cfa_offset 16 .cfi_offset %rbx, -16 - addq $24, %rsp - .cfi_def_cfa %rsp, 8 - retq -.Lfoo.__part.3_end: - .size foo.__part.3, .Lfoo.__part.3_end-foo.__part.3 + movl %edi, %ebx + cmpl $0, %ebx + je foo.__part.2 + jmp foo.__part.1 .cfi_endproc +.Lfoo_end: + .size foo, .Lfoo_end-foo # NB: Deliberately inserting padding to separate the two parts of the function # as we're currently only parsing a single FDE entry from a (coalesced) address @@ -37,13 +40,11 @@ foo.__part.3: foo.__part.1: .cfi_startproc - .cfi_def_cfa_offset 32 + .cfi_def_cfa_offset 16 .cfi_offset %rbx, -16 subq $16, %rsp - .cfi_def_cfa_offset 48 - callq bar - addq $16, %rsp .cfi_def_cfa_offset 32 + callq bar jmp foo.__part.3 .Lfoo.__part.1_end: .size foo.__part.1, .Lfoo.__part.1_end-foo.__part.1 @@ -51,50 +52,46 @@ foo.__part.1: bar: .cfi_startproc - subq $88, %rsp - .cfi_def_cfa_offset 96 + subq $24, %rsp + .cfi_def_cfa_offset 32 xorl %edi, %edi callq foo - addq $88, %rsp + addq $24, %rsp .cfi_def_cfa %rsp, 8 retq .cfi_endproc .Lbar_end: .size bar, .Lbar_end-bar - .type foo,@function -foo: +foo.__part.2: .cfi_startproc - pushq %rbx .cfi_def_cfa_offset 16 .cfi_offset %rbx, -16 - movl %edi, %ebx - cmpl $0, %ebx - je foo.__part.2 subq $16, %rsp .cfi_def_cfa_offset 32 - jmp foo.__part.1 + callq baz + jmp foo.__part.3 +.Lfoo.__part.2_end: + .size foo.__part.2, .Lfoo.__part.2_end-foo.__part.2 .cfi_endproc -.Lfoo_end: - .size foo, .Lfoo_end-foo # NB: Deliberately inserting padding to separate the two parts of the function # as we're currently only parsing a single FDE entry from a (coalesced) address # range. nop -foo.__part.2: +foo.__part.3: .cfi_startproc - .cfi_def_cfa_offset 16 - .cfi_offset %rbx, -16 - subq $16, %rsp .cfi_def_cfa_offset 32 - callq baz - jmp foo.__part.3 -.Lfoo.__part.2_end: - .size foo.__part.2, .Lfoo.__part.2_end-foo.__part.2 + .cfi_offset %rbx, -16 + addq $24, %rsp + .cfi_def_cfa %rsp, 8 + retq +.Lfoo.__part.3_end: + .size foo.__part.3, .Lfoo.__part.3_end-foo.__part.3 .cfi_endproc + .globl main .type main,@function main: diff --git a/lldb/test/Shell/Unwind/basic-block-sections-with-dwarf-static.test b/lldb/test/Shell/Unwind/basic-block-sections-with-dwarf-static.test index b83e388e79d21..a4ed73e14de01 100644 --- a/lldb/test/Shell/Unwind/basic-block-sections-with-dwarf-static.test +++ b/lldb/test/Shell/Unwind/basic-block-sections-with-dwarf-static.test @@ -22,17 +22,15 @@ image show-unwind --cached true -n foo # CHECK-NEXT: This UnwindPlan is sourced from the compiler: yes. # CHECK-NEXT: This UnwindPlan is valid at all instruction locations: no. # CHECK-NEXT: This UnwindPlan is for a trap handler function: no. -# CHECK-NEXT: Address range of this UnwindPlan: [{{.*}}.text + 6-0x000000000000000b)[{{.*}}.text + 12-0x000000000000001b)[{{.*}}.text + 43-0x0000000000000039)[{{.*}}.text + 58-0x0000000000000045) -# CHECK-NEXT: row[0]: -37: CFA=rsp+32 => rbx=[CFA-16] rip=[CFA-8] -# CHECK-NEXT: row[1]: -33: CFA=rsp +8 => rbx=[CFA-16] rip=[CFA-8] -# CHECK-NEXT: row[2]: -31: CFA=rsp+32 => rbx=[CFA-16] rip=[CFA-8] -# CHECK-NEXT: row[3]: -27: CFA=rsp+48 => rbx=[CFA-16] rip=[CFA-8] -# CHECK-NEXT: row[4]: -18: CFA=rsp+32 => rbx=[CFA-16] rip=[CFA-8] -# CHECK-NEXT: row[5]: 0: CFA=rsp +8 => rip=[CFA-8] -# CHECK-NEXT: row[6]: 1: CFA=rsp+16 => rbx=[CFA-16] rip=[CFA-8] -# CHECK-NEXT: row[7]: 12: CFA=rsp+32 => rbx=[CFA-16] rip=[CFA-8] -# CHECK-NEXT: row[8]: 15: CFA=rsp+16 => rbx=[CFA-16] rip=[CFA-8] -# CHECK-NEXT: row[9]: 19: CFA=rsp+32 => rbx=[CFA-16] rip=[CFA-8] +# CHECK-NEXT: Address range of this UnwindPlan: [{{.*}}.text + 6-0x0000000000000010)[{{.*}}.text + 17-0x000000000000001c)[{{.*}}.text + 44-0x0000000000000037)[{{.*}}.text + 56-0x000000000000003d) +# CHECK-NEXT: row[0]: 0: CFA=rsp +8 => rip=[CFA-8] +# CHECK-NEXT: row[1]: 1: CFA=rsp+16 => rbx=[CFA-16] rip=[CFA-8] +# CHECK-NEXT: row[2]: 11: CFA=rsp+16 => rbx=[CFA-16] rip=[CFA-8] +# CHECK-NEXT: row[3]: 15: CFA=rsp+32 => rbx=[CFA-16] rip=[CFA-8] +# CHECK-NEXT: row[4]: 38: CFA=rsp+16 => rbx=[CFA-16] rip=[CFA-8] +# CHECK-NEXT: row[5]: 42: CFA=rsp+32 => rbx=[CFA-16] rip=[CFA-8] +# CHECK-NEXT: row[6]: 50: CFA=rsp+32 => rbx=[CFA-16] rip=[CFA-8] +# CHECK-NEXT: row[7]: 54: CFA=rsp +8 => rbx=[CFA-16] rip=[CFA-8] # CHECK-EMPTY: image show-unwind --cached true -n bar @@ -43,8 +41,8 @@ image show-unwind --cached true -n bar # CHECK-NEXT: This UnwindPlan is sourced from the compiler: yes. # CHECK-NEXT: This UnwindPlan is valid at all instruction locations: no. # CHECK-NEXT: This UnwindPlan is for a trap handler function: no. -# CHECK-NEXT: Address range of this UnwindPlan: [{{.*}}.text + 27-0x000000000000002b) +# CHECK-NEXT: Address range of this UnwindPlan: [{{.*}}.text + 28-0x000000000000002c) # CHECK-NEXT: row[0]: 0: CFA=rsp +8 => rip=[CFA-8] -# CHECK-NEXT: row[1]: 4: CFA=rsp+96 => rip=[CFA-8] +# CHECK-NEXT: row[1]: 4: CFA=rsp+32 => rip=[CFA-8] # CHECK-NEXT: row[2]: 15: CFA=rsp +8 => rip=[CFA-8] # CHECK-EMPTY: diff --git a/lldb/tools/debugserver/source/CMakeLists.txt b/lldb/tools/debugserver/source/CMakeLists.txt index 8340b5ad8948d..f7ff76c3e8e84 100644 --- a/lldb/tools/debugserver/source/CMakeLists.txt +++ b/lldb/tools/debugserver/source/CMakeLists.txt @@ -154,21 +154,6 @@ endif() add_definitions(-DLLDB_USE_OS_LOG) -# Make sure we have the macOS SDK root as mig needs it and will silently -# fail to generate its output files without it. -if(CMAKE_OSX_SYSROOT) - set(MIG_SYSROOT ${CMAKE_OSX_SYSROOT}) -else() - execute_process(COMMAND xcrun --show-sdk-path - OUTPUT_VARIABLE MIG_SYSROOT - ERROR_QUIET - OUTPUT_STRIP_TRAILING_WHITESPACE) -endif() - -if(NOT MIG_SYSROOT) - message(FATAL_ERROR "Unable to obtain sysroot required by mig (Mach Interface Generator). Set CMAKE_OSX_SYSROOT to explicitly specify a sysroot.") -endif() - if(${CMAKE_OSX_SYSROOT} MATCHES ".Internal.sdk$") message(STATUS "LLDB debugserver energy support is enabled") add_definitions(-DLLDB_ENERGY) @@ -192,7 +177,7 @@ endif() separate_arguments(MIG_ARCH_FLAGS_SEPARTED NATIVE_COMMAND "${MIG_ARCH_FLAGS}") add_custom_command(OUTPUT ${generated_mach_interfaces} - VERBATIM COMMAND mig ${MIG_ARCH_FLAGS_SEPARTED} -isysroot ${MIG_SYSROOT} ${CMAKE_CURRENT_SOURCE_DIR}/MacOSX/dbgnub-mig.defs + VERBATIM COMMAND mig ${MIG_ARCH_FLAGS_SEPARTED} -isysroot ${CMAKE_OSX_SYSROOT} ${CMAKE_CURRENT_SOURCE_DIR}/MacOSX/dbgnub-mig.defs DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/MacOSX/dbgnub-mig.defs ) diff --git a/lldb/tools/lldb-dap/DAP.cpp b/lldb/tools/lldb-dap/DAP.cpp index 56a0c38b00037..51f9da854f4b6 100644 --- a/lldb/tools/lldb-dap/DAP.cpp +++ b/lldb/tools/lldb-dap/DAP.cpp @@ -559,6 +559,17 @@ lldb::SBFrame DAP::GetLLDBFrame(const llvm::json::Object &arguments) { return GetLLDBFrame(frame_id); } +llvm::json::Value DAP::CreateTopLevelScopes() { + llvm::json::Array scopes; + scopes.emplace_back( + CreateScope("Locals", VARREF_LOCALS, variables.locals.GetSize(), false)); + scopes.emplace_back(CreateScope("Globals", VARREF_GLOBALS, + variables.globals.GetSize(), false)); + scopes.emplace_back(CreateScope("Registers", VARREF_REGS, + variables.registers.GetSize(), false)); + return llvm::json::Value(std::move(scopes)); +} + ReplMode DAP::DetectReplMode(lldb::SBFrame frame, std::string &expression, bool partial_expression) { // Check for the escape hatch prefix. diff --git a/lldb/tools/lldb-dap/DAP.h b/lldb/tools/lldb-dap/DAP.h index c1a1130b1e59f..c2e4c2dea582e 100644 --- a/lldb/tools/lldb-dap/DAP.h +++ b/lldb/tools/lldb-dap/DAP.h @@ -226,8 +226,7 @@ struct DAP { /// \param[in] default_repl_mode /// Default repl mode behavior, as configured by the binary. /// \param[in] pre_init_commands - /// LLDB commands to execute as soon as the debugger instance is - /// allocated. + /// LLDB commands to execute as soon as the debugger instance is allocaed. /// \param[in] transport /// Transport for this debug session. DAP(Log *log, const ReplMode default_repl_mode, @@ -284,10 +283,10 @@ struct DAP { lldb::SBThread GetLLDBThread(const llvm::json::Object &arguments); lldb::SBFrame GetLLDBFrame(uint64_t frame_id); - /// TODO: remove this function when we finish migrating to the - /// new protocol types. lldb::SBFrame GetLLDBFrame(const llvm::json::Object &arguments); + llvm::json::Value CreateTopLevelScopes(); + void PopulateExceptionBreakpoints(); /// Attempt to determine if an expression is a variable expression or diff --git a/lldb/tools/lldb-dap/Handler/RequestHandler.h b/lldb/tools/lldb-dap/Handler/RequestHandler.h index eaebaf6619bbd..b0002440cf72e 100644 --- a/lldb/tools/lldb-dap/Handler/RequestHandler.h +++ b/lldb/tools/lldb-dap/Handler/RequestHandler.h @@ -452,15 +452,11 @@ class PauseRequestHandler : public LegacyRequestHandler { void operator()(const llvm::json::Object &request) const override; }; -class ScopesRequestHandler final - : public RequestHandler> { +class ScopesRequestHandler : public LegacyRequestHandler { public: - using RequestHandler::RequestHandler; + using LegacyRequestHandler::LegacyRequestHandler; static llvm::StringLiteral GetCommand() { return "scopes"; } - - llvm::Expected - Run(const protocol::ScopesArguments &args) const override; + void operator()(const llvm::json::Object &request) const override; }; class SetVariableRequestHandler final diff --git a/lldb/tools/lldb-dap/Handler/ScopesRequestHandler.cpp b/lldb/tools/lldb-dap/Handler/ScopesRequestHandler.cpp index aaad0e20f9c21..7d1608f59f9a4 100644 --- a/lldb/tools/lldb-dap/Handler/ScopesRequestHandler.cpp +++ b/lldb/tools/lldb-dap/Handler/ScopesRequestHandler.cpp @@ -7,56 +7,69 @@ //===----------------------------------------------------------------------===// #include "DAP.h" +#include "EventHelper.h" +#include "JSONUtils.h" #include "RequestHandler.h" -using namespace lldb_dap::protocol; namespace lldb_dap { -/// Creates a `protocol::Scope` struct. -/// -/// -/// \param[in] name -/// The value to place into the "name" key -/// -/// \param[in] variablesReference -/// The value to place into the "variablesReference" key -/// -/// \param[in] namedVariables -/// The value to place into the "namedVariables" key -/// -/// \param[in] expensive -/// The value to place into the "expensive" key -/// -/// \return -/// A `protocol::Scope` -static Scope CreateScope(const llvm::StringRef name, int64_t variablesReference, - int64_t namedVariables, bool expensive) { - Scope scope; - scope.name = name; - - // TODO: Support "arguments" and "return value" scope. - // At the moment lldb-dap includes the arguments and return_value into the - // "locals" scope. - // vscode only expands the first non-expensive scope, this causes friction - // if we add the arguments above the local scope as the locals scope will not - // be expanded if we enter a function with arguments. It becomes more - // annoying when the scope has arguments, return_value and locals. - if (variablesReference == VARREF_LOCALS) - scope.presentationHint = Scope::eScopePresentationHintLocals; - else if (variablesReference == VARREF_REGS) - scope.presentationHint = Scope::eScopePresentationHintRegisters; - - scope.variablesReference = variablesReference; - scope.namedVariables = namedVariables; - scope.expensive = expensive; - - return scope; -} - -llvm::Expected -ScopesRequestHandler::Run(const ScopesArguments &args) const { - lldb::SBFrame frame = dap.GetLLDBFrame(args.frameId); - +// "ScopesRequest": { +// "allOf": [ { "$ref": "#/definitions/Request" }, { +// "type": "object", +// "description": "Scopes request; value of command field is 'scopes'. The +// request returns the variable scopes for a given stackframe ID.", +// "properties": { +// "command": { +// "type": "string", +// "enum": [ "scopes" ] +// }, +// "arguments": { +// "$ref": "#/definitions/ScopesArguments" +// } +// }, +// "required": [ "command", "arguments" ] +// }] +// }, +// "ScopesArguments": { +// "type": "object", +// "description": "Arguments for 'scopes' request.", +// "properties": { +// "frameId": { +// "type": "integer", +// "description": "Retrieve the scopes for this stackframe." +// } +// }, +// "required": [ "frameId" ] +// }, +// "ScopesResponse": { +// "allOf": [ { "$ref": "#/definitions/Response" }, { +// "type": "object", +// "description": "Response to 'scopes' request.", +// "properties": { +// "body": { +// "type": "object", +// "properties": { +// "scopes": { +// "type": "array", +// "items": { +// "$ref": "#/definitions/Scope" +// }, +// "description": "The scopes of the stackframe. If the array has +// length zero, there are no scopes available." +// } +// }, +// "required": [ "scopes" ] +// } +// }, +// "required": [ "body" ] +// }] +// } +void ScopesRequestHandler::operator()(const llvm::json::Object &request) const { + llvm::json::Object response; + FillResponse(request, response); + llvm::json::Object body; + const auto *arguments = request.getObject("arguments"); + lldb::SBFrame frame = dap.GetLLDBFrame(*arguments); // As the user selects different stack frames in the GUI, a "scopes" request // will be sent to the DAP. This is the only way we know that the user has // selected a frame in a thread. There are no other notifications that are @@ -65,9 +78,9 @@ ScopesRequestHandler::Run(const ScopesArguments &args) const { // are sent, this allows users to type commands in the debugger console // with a backtick character to run lldb commands and these lldb commands // will now have the right context selected as they are run. If the user - // types "`bt" into the debugger console, and we had another thread selected + // types "`bt" into the debugger console and we had another thread selected // in the LLDB library, we would show the wrong thing to the user. If the - // users switch threads with a lldb command like "`thread select 14", the + // users switches threads with a lldb command like "`thread select 14", the // GUI will not update as there are no "event" notification packets that // allow us to change the currently selected thread or frame in the GUI that // I am aware of. @@ -75,6 +88,7 @@ ScopesRequestHandler::Run(const ScopesArguments &args) const { frame.GetThread().GetProcess().SetSelectedThread(frame.GetThread()); frame.GetThread().SetSelectedFrame(frame.GetFrameID()); } + dap.variables.locals = frame.GetVariables(/*arguments=*/true, /*locals=*/true, /*statics=*/false, @@ -84,15 +98,9 @@ ScopesRequestHandler::Run(const ScopesArguments &args) const { /*statics=*/true, /*in_scope_only=*/true); dap.variables.registers = frame.GetRegisters(); - - std::vector scopes = {CreateScope("Locals", VARREF_LOCALS, - dap.variables.locals.GetSize(), false), - CreateScope("Globals", VARREF_GLOBALS, - dap.variables.globals.GetSize(), false), - CreateScope("Registers", VARREF_REGS, - dap.variables.registers.GetSize(), false)}; - - return ScopesResponseBody{std::move(scopes)}; + body.try_emplace("scopes", dap.CreateTopLevelScopes()); + response.try_emplace("body", std::move(body)); + dap.SendJSON(llvm::json::Value(std::move(response))); } } // namespace lldb_dap diff --git a/lldb/tools/lldb-dap/JSONUtils.cpp b/lldb/tools/lldb-dap/JSONUtils.cpp index a8bd672583a5d..279e6d3d93814 100644 --- a/lldb/tools/lldb-dap/JSONUtils.cpp +++ b/lldb/tools/lldb-dap/JSONUtils.cpp @@ -416,11 +416,9 @@ llvm::json::Value CreateModule(lldb::SBTarget &target, lldb::SBModule &module, } else { object.try_emplace("symbolStatus", "Symbols not found."); } - std::string load_address = - llvm::formatv("{0:x}", - module.GetObjectFileHeaderAddress().GetLoadAddress(target)) - .str(); - object.try_emplace("addressRange", load_address); + std::string loaded_addr = std::to_string( + module.GetObjectFileHeaderAddress().GetLoadAddress(target)); + object.try_emplace("addressRange", loaded_addr); std::string version_str; uint32_t version_nums[3]; uint32_t num_versions = diff --git a/lldb/tools/lldb-dap/JSONUtils.h b/lldb/tools/lldb-dap/JSONUtils.h index 783f291338d8c..9c4dd0584bd21 100644 --- a/lldb/tools/lldb-dap/JSONUtils.h +++ b/lldb/tools/lldb-dap/JSONUtils.h @@ -238,6 +238,27 @@ llvm::json::Object CreateEventObject(const llvm::StringRef event_name); protocol::ExceptionBreakpointsFilter CreateExceptionBreakpointFilter(const ExceptionBreakpoint &bp); +/// Create a "Scope" JSON object as described in the debug adapter definition. +/// +/// \param[in] name +/// The value to place into the "name" key +// +/// \param[in] variablesReference +/// The value to place into the "variablesReference" key +// +/// \param[in] namedVariables +/// The value to place into the "namedVariables" key +// +/// \param[in] expensive +/// The value to place into the "expensive" key +/// +/// \return +/// A "Scope" JSON object with that follows the formal JSON +/// definition outlined by Microsoft. +llvm::json::Value CreateScope(const llvm::StringRef name, + int64_t variablesReference, + int64_t namedVariables, bool expensive); + /// Create a "Source" JSON object as described in the debug adapter definition. /// /// \param[in] file diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.cpp b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.cpp index 7efab87d39986..316e146d43a0f 100644 --- a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.cpp +++ b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.cpp @@ -335,20 +335,6 @@ llvm::json::Value toJSON(const SetVariableResponseBody &SVR) { return llvm::json::Value(std::move(Body)); } -bool fromJSON(const llvm::json::Value &Params, ScopesArguments &SCA, - llvm::json::Path P) { - json::ObjectMapper O(Params, P); - return O && O.map("frameId", SCA.frameId); -} - -llvm::json::Value toJSON(const ScopesResponseBody &SCR) { - llvm::json::Array scopes; - for (const Scope &scope : SCR.scopes) { - scopes.emplace_back(toJSON(scope)); - } - - return llvm::json::Object{{"scopes", std::move(scopes)}}; -} bool fromJSON(const json::Value &Params, SourceArguments &SA, json::Path P) { json::ObjectMapper O(Params, P); diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h index 4e08b4728453b..710fa5d2c57ed 100644 --- a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h +++ b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h @@ -439,19 +439,6 @@ struct SetVariableResponseBody { }; llvm::json::Value toJSON(const SetVariableResponseBody &); -struct ScopesArguments { - /// Retrieve the scopes for the stack frame identified by `frameId`. The - /// `frameId` must have been obtained in the current suspended state. See - /// 'Lifetime of Object References' in the Overview section for details. - uint64_t frameId = LLDB_INVALID_FRAME_ID; -}; -bool fromJSON(const llvm::json::Value &, ScopesArguments &, llvm::json::Path); - -struct ScopesResponseBody { - std::vector scopes; -}; -llvm::json::Value toJSON(const ScopesResponseBody &); - /// Arguments for `source` request. struct SourceArguments { /// Specifies the source content to load. Either `source.path` or diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolTypes.cpp b/lldb/tools/lldb-dap/Protocol/ProtocolTypes.cpp index ce7519e3b16b8..fafd061334bc9 100644 --- a/lldb/tools/lldb-dap/Protocol/ProtocolTypes.cpp +++ b/lldb/tools/lldb-dap/Protocol/ProtocolTypes.cpp @@ -16,18 +16,17 @@ using namespace llvm; namespace lldb_dap::protocol { -bool fromJSON(const json::Value &Params, Source::PresentationHint &PH, - json::Path P) { +bool fromJSON(const json::Value &Params, PresentationHint &PH, json::Path P) { auto rawHint = Params.getAsString(); if (!rawHint) { P.report("expected a string"); return false; } - std::optional hint = - StringSwitch>(*rawHint) - .Case("normal", Source::eSourcePresentationHintNormal) - .Case("emphasize", Source::eSourcePresentationHintEmphasize) - .Case("deemphasize", Source::eSourcePresentationHintDeemphasize) + std::optional hint = + StringSwitch>(*rawHint) + .Case("normal", ePresentationHintNormal) + .Case("emphasize", ePresentationHintEmphasize) + .Case("deemphasize", ePresentationHintDeemphasize) .Default(std::nullopt); if (!hint) { P.report("unexpected value"); @@ -44,13 +43,13 @@ bool fromJSON(const json::Value &Params, Source &S, json::Path P) { O.map("sourceReference", S.sourceReference); } -llvm::json::Value toJSON(Source::PresentationHint hint) { +llvm::json::Value toJSON(PresentationHint hint) { switch (hint) { - case Source::eSourcePresentationHintNormal: + case ePresentationHintNormal: return "normal"; - case Source::eSourcePresentationHintEmphasize: + case ePresentationHintEmphasize: return "emphasize"; - case Source::eSourcePresentationHintDeemphasize: + case ePresentationHintDeemphasize: return "deemphasize"; } llvm_unreachable("unhandled presentation hint."); @@ -106,7 +105,7 @@ bool fromJSON(const json::Value &Params, ColumnType &CT, json::Path P) { .Case("string", eColumnTypeString) .Case("number", eColumnTypeNumber) .Case("boolean", eColumnTypeBoolean) - .Case("unixTimestampUTC", eColumnTypeTimestamp) + .Case("unixTimestampUTC ", eColumnTypeTimestamp) .Default(std::nullopt); if (!columnType) { P.report("unexpected value, expected 'string', 'number', 'boolean', or " @@ -436,90 +435,6 @@ json::Value toJSON(const Capabilities &C) { return result; } -bool fromJSON(const json::Value &Params, Scope::PresentationHint &PH, - json::Path P) { - auto rawHint = Params.getAsString(); - if (!rawHint) { - P.report("expected a string"); - return false; - } - const std::optional hint = - StringSwitch>(*rawHint) - .Case("arguments", Scope::eScopePresentationHintArguments) - .Case("locals", Scope::eScopePresentationHintLocals) - .Case("registers", Scope::eScopePresentationHintRegisters) - .Case("returnValue", Scope::eScopePresentationHintReturnValue) - .Default(std::nullopt); - if (!hint) { - P.report("unexpected value"); - return false; - } - PH = *hint; - return true; -} - -bool fromJSON(const json::Value &Params, Scope &S, json::Path P) { - json::ObjectMapper O(Params, P); - return O && O.map("name", S.name) && - O.mapOptional("presentationHint", S.presentationHint) && - O.map("variablesReference", S.variablesReference) && - O.mapOptional("namedVariables", S.namedVariables) && - O.map("indexedVariables", S.indexedVariables) && - O.mapOptional("source", S.source) && O.map("expensive", S.expensive) && - O.mapOptional("line", S.line) && O.mapOptional("column", S.column) && - O.mapOptional("endLine", S.endLine) && - O.mapOptional("endColumn", S.endColumn); -} - -llvm::json::Value toJSON(const Scope &SC) { - llvm::json::Object result{{"name", SC.name}, - {"variablesReference", SC.variablesReference}, - {"expensive", SC.expensive}}; - - if (SC.presentationHint.has_value()) { - llvm::StringRef presentationHint; - switch (*SC.presentationHint) { - case Scope::eScopePresentationHintArguments: - presentationHint = "arguments"; - break; - case Scope::eScopePresentationHintLocals: - presentationHint = "locals"; - break; - case Scope::eScopePresentationHintRegisters: - presentationHint = "registers"; - break; - case Scope::eScopePresentationHintReturnValue: - presentationHint = "returnValue"; - break; - } - - result.insert({"presentationHint", presentationHint}); - } - - if (SC.namedVariables.has_value()) - result.insert({"namedVariables", SC.namedVariables}); - - if (SC.indexedVariables.has_value()) - result.insert({"indexedVariables", SC.indexedVariables}); - - if (SC.source.has_value()) - result.insert({"source", SC.source}); - - if (SC.line.has_value()) - result.insert({"line", SC.line}); - - if (SC.column.has_value()) - result.insert({"column", SC.column}); - - if (SC.endLine.has_value()) - result.insert({"endLine", SC.endLine}); - - if (SC.endColumn.has_value()) - result.insert({"endColumn", SC.endColumn}); - - return result; -} - bool fromJSON(const llvm::json::Value &Params, Capabilities &C, llvm::json::Path P) { auto *Object = Params.getAsObject(); @@ -567,18 +482,6 @@ bool fromJSON(const llvm::json::Value &Params, SteppingGranularity &SG, return true; } -llvm::json::Value toJSON(const SteppingGranularity &SG) { - switch (SG) { - case eSteppingGranularityStatement: - return "statement"; - case eSteppingGranularityLine: - return "line"; - case eSteppingGranularityInstruction: - return "instruction"; - } - llvm_unreachable("unhandled stepping granularity."); -} - bool fromJSON(const llvm::json::Value &Params, ValueFormat &VF, llvm::json::Path P) { json::ObjectMapper O(Params, P); diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolTypes.h b/lldb/tools/lldb-dap/Protocol/ProtocolTypes.h index 3df77ee7374a7..f8d2b35ce3e14 100644 --- a/lldb/tools/lldb-dap/Protocol/ProtocolTypes.h +++ b/lldb/tools/lldb-dap/Protocol/ProtocolTypes.h @@ -27,8 +27,6 @@ #include #include -#define LLDB_DAP_INVALID_VARRERF UINT64_MAX - namespace lldb_dap::protocol { /// An `ExceptionBreakpointsFilter` is shown in the UI as an filter option for @@ -285,16 +283,18 @@ struct Capabilities { bool fromJSON(const llvm::json::Value &, Capabilities &, llvm::json::Path); llvm::json::Value toJSON(const Capabilities &); +enum PresentationHint : unsigned { + ePresentationHintNormal, + ePresentationHintEmphasize, + ePresentationHintDeemphasize +}; +bool fromJSON(const llvm::json::Value &, PresentationHint &, llvm::json::Path); +llvm::json::Value toJSON(PresentationHint hint); + /// A `Source` is a descriptor for source code. It is returned from the debug /// adapter as part of a `StackFrame` and it is used by clients when specifying /// breakpoints. struct Source { - enum PresentationHint : unsigned { - eSourcePresentationHintNormal, - eSourcePresentationHintEmphasize, - eSourcePresentationHintDeemphasize, - }; - /// The short name of the source. Every source returned from the debug adapter /// has a name. When sending a source to the debug adapter this name is /// optional. @@ -318,82 +318,9 @@ struct Source { // unsupported keys: origin, sources, adapterData, checksums }; -bool fromJSON(const llvm::json::Value &, Source::PresentationHint &, - llvm::json::Path); -llvm::json::Value toJSON(Source::PresentationHint); bool fromJSON(const llvm::json::Value &, Source &, llvm::json::Path); llvm::json::Value toJSON(const Source &); -/// A `Scope` is a named container for variables. Optionally a scope can map to -/// a source or a range within a source. -struct Scope { - enum PresentationHint : unsigned { - eScopePresentationHintArguments, - eScopePresentationHintLocals, - eScopePresentationHintRegisters, - eScopePresentationHintReturnValue - }; - /// Name of the scope such as 'Arguments', 'Locals', or 'Registers'. This - /// string is shown in the UI as is and can be translated. - //// - std::string name; - - /// A hint for how to present this scope in the UI. If this attribute is - /// missing, the scope is shown with a generic UI. - /// Values: - /// 'arguments': Scope contains method arguments. - /// 'locals': Scope contains local variables. - /// 'registers': Scope contains registers. Only a single `registers` scope - /// should be returned from a `scopes` request. - /// 'returnValue': Scope contains one or more return values. - /// etc. - std::optional presentationHint; - - /// The variables of this scope can be retrieved by passing the value of - /// `variablesReference` to the `variables` request as long as execution - /// remains suspended. See 'Lifetime of Object References' in the Overview - /// section for details. - //// - uint64_t variablesReference = LLDB_DAP_INVALID_VARRERF; - - /// The number of named variables in this scope. - /// The client can use this information to present the variables in a paged UI - /// and fetch them in chunks. - std::optional namedVariables; - - /// The number of indexed variables in this scope. - /// The client can use this information to present the variables in a paged UI - /// and fetch them in chunks. - std::optional indexedVariables; - - /// The source for this scope. - std::optional source; - - /// If true, the number of variables in this scope is large or expensive to - /// retrieve. - bool expensive = false; - - /// The start line of the range covered by this scope. - std::optional line; - - /// Start position of the range covered by the scope. It is measured in UTF-16 - /// code units and the client capability `columnsStartAt1` determines whether - /// it is 0- or 1-based. - std::optional column; - - /// The end line of the range covered by this scope. - std::optional endLine; - - /// End position of the range covered by the scope. It is measured in UTF-16 - /// code units and the client capability `columnsStartAt1` determines whether - /// it is 0- or 1-based. - std::optional endColumn; -}; -bool fromJSON(const llvm::json::Value &Params, Scope::PresentationHint &PH, - llvm::json::Path); -bool fromJSON(const llvm::json::Value &, Scope &, llvm::json::Path); -llvm::json::Value toJSON(const Scope &); - /// The granularity of one `step` in the stepping requests `next`, `stepIn`, /// `stepOut` and `stepBack`. enum SteppingGranularity : unsigned { @@ -412,7 +339,6 @@ enum SteppingGranularity : unsigned { }; bool fromJSON(const llvm::json::Value &, SteppingGranularity &, llvm::json::Path); -llvm::json::Value toJSON(const SteppingGranularity &); /// Provides formatting information for a value. struct ValueFormat { diff --git a/lldb/tools/lldb-dap/package.json b/lldb/tools/lldb-dap/package.json index d5ca604798799..e3e46526f379f 100644 --- a/lldb/tools/lldb-dap/package.json +++ b/lldb/tools/lldb-dap/package.json @@ -244,26 +244,6 @@ } } ], - "commands": [ - { - "command": "lldb-dap.modules.copyProperty", - "title": "Copy Value" - } - ], - "menus": { - "commandPalette": [ - { - "command": "lldb-dap.modules.copyProperty", - "when": "false" - } - ], - "view/item/context": [ - { - "command": "lldb-dap.modules.copyProperty", - "when": "view == lldb-dap.modules && viewItem == property" - } - ] - }, "breakpoints": [ { "language": "ada" diff --git a/lldb/tools/lldb-dap/src-ts/extension.ts b/lldb/tools/lldb-dap/src-ts/extension.ts index c8e5146e29cea..a5c0a09ae60cf 100644 --- a/lldb/tools/lldb-dap/src-ts/extension.ts +++ b/lldb/tools/lldb-dap/src-ts/extension.ts @@ -6,10 +6,7 @@ import { LaunchUriHandler } from "./uri-launch-handler"; import { LLDBDapConfigurationProvider } from "./debug-configuration-provider"; import { LLDBDapServer } from "./lldb-dap-server"; import { DebugSessionTracker } from "./debug-session-tracker"; -import { - ModulesDataProvider, - ModuleProperty, -} from "./ui/modules-data-provider"; +import { ModulesDataProvider } from "./ui/modules-data-provider"; /** * This class represents the extension and manages its life cycle. Other extensions @@ -43,11 +40,6 @@ export class LLDBDapExtension extends DisposableContext { ), vscode.window.registerUriHandler(new LaunchUriHandler()), ); - - vscode.commands.registerCommand( - "lldb-dap.modules.copyProperty", - (node: ModuleProperty) => vscode.env.clipboard.writeText(node.value), - ); } } diff --git a/lldb/tools/lldb-dap/src-ts/ui/modules-data-provider.ts b/lldb/tools/lldb-dap/src-ts/ui/modules-data-provider.ts index 091c1d69ac647..478c162de8878 100644 --- a/lldb/tools/lldb-dap/src-ts/ui/modules-data-provider.ts +++ b/lldb/tools/lldb-dap/src-ts/ui/modules-data-provider.ts @@ -2,51 +2,10 @@ import * as vscode from "vscode"; import { DebugProtocol } from "@vscode/debugprotocol"; import { DebugSessionTracker } from "../debug-session-tracker"; -export interface ModuleProperty { - key: string; - value: string; -} - -/** Type to represent both Module and ModuleProperty since TreeDataProvider - * expects one concrete type */ -type TreeData = DebugProtocol.Module | ModuleProperty; - -function isModule(type: TreeData): type is DebugProtocol.Module { - return (type as DebugProtocol.Module).id !== undefined; -} - -class ModuleItem extends vscode.TreeItem { - constructor(module: DebugProtocol.Module) { - super(module.name, vscode.TreeItemCollapsibleState.Collapsed); - this.description = module.symbolStatus; - } - - static getProperties(module: DebugProtocol.Module): ModuleProperty[] { - // does not include the name and symbol status as it is show in the parent. - let children: ModuleProperty[] = []; - children.push({ key: "id:", value: module.id.toString() }); - - if (module.addressRange) { - children.push({ - key: "load address:", - value: module.addressRange, - }); - } - if (module.path) { - children.push({ key: "path:", value: module.path }); - } - if (module.version) { - children.push({ key: "version:", value: module.version }); - } - if (module.symbolFilePath) { - children.push({ key: "symbol filepath:", value: module.symbolFilePath }); - } - return children; - } -} - /** A tree data provider for listing loaded modules for the active debug session. */ -export class ModulesDataProvider implements vscode.TreeDataProvider { +export class ModulesDataProvider + implements vscode.TreeDataProvider +{ private changeTreeData = new vscode.EventEmitter(); readonly onDidChangeTreeData = this.changeTreeData.event; @@ -57,31 +16,46 @@ export class ModulesDataProvider implements vscode.TreeDataProvider { ); } - getTreeItem(module: TreeData): vscode.TreeItem { - if (isModule(module)) { - return new ModuleItem(module); + getTreeItem(module: DebugProtocol.Module): vscode.TreeItem { + let treeItem = new vscode.TreeItem(/*label=*/ module.name); + if (module.path) { + treeItem.description = `${module.id} -- ${module.path}`; + } else { + treeItem.description = `${module.id}`; } - let item = new vscode.TreeItem(module.key); - item.description = module.value; - item.tooltip = `${module.key} ${module.value}`; - item.contextValue = "property"; - return item; + const tooltip = new vscode.MarkdownString(); + tooltip.appendMarkdown(`# ${module.name}\n\n`); + tooltip.appendMarkdown(`- **ID**: ${module.id}\n`); + if (module.addressRange) { + tooltip.appendMarkdown( + `- **Load address**: 0x${Number(module.addressRange).toString(16)}\n`, + ); + } + if (module.path) { + tooltip.appendMarkdown(`- **Path**: ${module.path}\n`); + } + if (module.version) { + tooltip.appendMarkdown(`- **Version**: ${module.version}\n`); + } + if (module.symbolStatus) { + tooltip.appendMarkdown(`- **Symbol status**: ${module.symbolStatus}\n`); + } + if (module.symbolFilePath) { + tooltip.appendMarkdown( + `- **Symbol file path**: ${module.symbolFilePath}\n`, + ); + } + + treeItem.tooltip = tooltip; + return treeItem; } - getChildren(element?: TreeData): TreeData[] { + getChildren(): DebugProtocol.Module[] { if (!vscode.debug.activeDebugSession) { return []; } - if (!element) { - return this.tracker.debugSessionModules(vscode.debug.activeDebugSession); - } - - if (isModule(element)) { - return ModuleItem.getProperties(element); - } - - return []; + return this.tracker.debugSessionModules(vscode.debug.activeDebugSession); } } diff --git a/lldb/unittests/DAP/CMakeLists.txt b/lldb/unittests/DAP/CMakeLists.txt index af7d11e2e95e2..8b240654046e2 100644 --- a/lldb/unittests/DAP/CMakeLists.txt +++ b/lldb/unittests/DAP/CMakeLists.txt @@ -1,11 +1,7 @@ add_lldb_unittest(DAPTests - DAPTest.cpp - Handler/DisconnectTest.cpp JSONUtilsTest.cpp LLDBUtilsTest.cpp ProtocolTypesTest.cpp - TestBase.cpp - TransportTest.cpp LINK_LIBS lldbDAP diff --git a/lldb/unittests/DAP/DAPTest.cpp b/lldb/unittests/DAP/DAPTest.cpp deleted file mode 100644 index 5fb6bf7e564ab..0000000000000 --- a/lldb/unittests/DAP/DAPTest.cpp +++ /dev/null @@ -1,38 +0,0 @@ -//===-- DAPTest.cpp -------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "DAP.h" -#include "Protocol/ProtocolBase.h" -#include "TestBase.h" -#include "Transport.h" -#include "llvm/Testing/Support/Error.h" -#include "gtest/gtest.h" -#include -#include -#include - -using namespace llvm; -using namespace lldb; -using namespace lldb_dap; -using namespace lldb_dap_tests; -using namespace lldb_dap::protocol; - -class DAPTest : public TransportBase {}; - -TEST_F(DAPTest, SendProtocolMessages) { - DAP dap{ - /*log=*/nullptr, - /*default_repl_mode=*/ReplMode::Auto, - /*pre_init_commands=*/{}, - /*transport=*/*to_dap, - }; - dap.Send(Event{/*event=*/"my-event", /*body=*/std::nullopt}); - ASSERT_THAT_EXPECTED(from_dap->Read(std::chrono::milliseconds(1)), - HasValue(testing::VariantWith(testing::FieldsAre( - /*event=*/"my-event", /*body=*/std::nullopt)))); -} diff --git a/lldb/unittests/DAP/Handler/DisconnectTest.cpp b/lldb/unittests/DAP/Handler/DisconnectTest.cpp deleted file mode 100644 index 6f3470239e974..0000000000000 --- a/lldb/unittests/DAP/Handler/DisconnectTest.cpp +++ /dev/null @@ -1,35 +0,0 @@ -//===-- DisconnectTest.cpp ------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "DAP.h" -#include "Handler/RequestHandler.h" -#include "Protocol/ProtocolBase.h" -#include "TestBase.h" -#include "llvm/Testing/Support/Error.h" -#include "gtest/gtest.h" -#include -#include - -using namespace llvm; -using namespace lldb; -using namespace lldb_dap; -using namespace lldb_dap_tests; -using namespace lldb_dap::protocol; - -class DisconnectRequestHandlerTest : public DAPTestBase {}; - -TEST_F(DisconnectRequestHandlerTest, DisconnectingTriggersTerminated) { - DisconnectRequestHandler handler(*dap); - EXPECT_FALSE(dap->disconnecting); - ASSERT_THAT_ERROR(handler.Run(std::nullopt), Succeeded()); - EXPECT_TRUE(dap->disconnecting); - std::vector messages = DrainOutput(); - EXPECT_THAT(messages, - testing::Contains(testing::VariantWith(testing::FieldsAre( - /*event=*/"terminated", /*body=*/std::nullopt)))); -} diff --git a/lldb/unittests/DAP/ProtocolTypesTest.cpp b/lldb/unittests/DAP/ProtocolTypesTest.cpp index 5d5125dc78fba..fd3e3be073183 100644 --- a/lldb/unittests/DAP/ProtocolTypesTest.cpp +++ b/lldb/unittests/DAP/ProtocolTypesTest.cpp @@ -50,7 +50,7 @@ TEST(ProtocolTypesTest, Source) { source.name = "testName"; source.path = "/path/to/source"; source.sourceReference = 12345; - source.presentationHint = Source::eSourcePresentationHintEmphasize; + source.presentationHint = ePresentationHintEmphasize; llvm::Expected deserialized_source = roundtrip(source); ASSERT_THAT_EXPECTED(deserialized_source, llvm::Succeeded()); @@ -101,8 +101,8 @@ TEST(ProtocolTypesTest, Breakpoint) { breakpoint.id = 42; breakpoint.verified = true; breakpoint.message = "Breakpoint set successfully"; - breakpoint.source = Source{"test.cpp", "/path/to/test.cpp", 123, - Source::eSourcePresentationHintNormal}; + breakpoint.source = + Source{"test.cpp", "/path/to/test.cpp", 123, ePresentationHintNormal}; breakpoint.line = 10; breakpoint.column = 5; breakpoint.endLine = 15; @@ -291,242 +291,3 @@ TEST(ProtocolTypesTest, Capabilities) { EXPECT_EQ(capabilities.lldbExtVersion, deserialized_capabilities->lldbExtVersion); } - -TEST(ProtocolTypesTest, Scope) { - Scope scope; - scope.name = "Locals"; - scope.presentationHint = Scope::eScopePresentationHintLocals; - scope.variablesReference = 1; - scope.namedVariables = 2; - scope.indexedVariables = std::nullopt; - scope.expensive = false; - scope.line = 2; - scope.column = 3; - scope.endLine = 10; - scope.endColumn = 20; - - Source source; - source.name = "testName"; - source.path = "/path/to/source"; - source.sourceReference = 12345; - source.presentationHint = Source::eSourcePresentationHintNormal; - scope.source = source; - - llvm::Expected deserialized_scope = roundtrip(scope); - ASSERT_THAT_EXPECTED(deserialized_scope, llvm::Succeeded()); - EXPECT_EQ(scope.name, deserialized_scope->name); - EXPECT_EQ(scope.presentationHint, deserialized_scope->presentationHint); - EXPECT_EQ(scope.variablesReference, deserialized_scope->variablesReference); - EXPECT_EQ(scope.namedVariables, deserialized_scope->namedVariables); - EXPECT_EQ(scope.indexedVariables, deserialized_scope->indexedVariables); - EXPECT_EQ(scope.expensive, deserialized_scope->expensive); - EXPECT_EQ(scope.line, deserialized_scope->line); - EXPECT_EQ(scope.column, deserialized_scope->column); - EXPECT_EQ(scope.endLine, deserialized_scope->endLine); - EXPECT_EQ(scope.endColumn, deserialized_scope->endColumn); - - EXPECT_THAT(deserialized_scope->source.has_value(), true); - const Source &deserialized_source = deserialized_scope->source.value(); - - EXPECT_EQ(source.path, deserialized_source.path); - EXPECT_EQ(source.sourceReference, deserialized_source.sourceReference); - EXPECT_EQ(source.presentationHint, deserialized_source.presentationHint); -} - -TEST(ProtocolTypesTest, PresentationHint) { - // Test all PresentationHint values. - std::vector> test_cases = - {{Source::eSourcePresentationHintNormal, "normal"}, - {Source::eSourcePresentationHintEmphasize, "emphasize"}, - {Source::eSourcePresentationHintDeemphasize, "deemphasize"}}; - - for (const auto &test_case : test_cases) { - // Serialize the PresentationHint to JSON. - llvm::json::Value serialized = toJSON(test_case.first); - ASSERT_EQ(serialized.kind(), llvm::json::Value::Kind::String); - EXPECT_EQ(serialized.getAsString(), test_case.second); - - // Deserialize the JSON back to PresentationHint. - Source::PresentationHint deserialized; - llvm::json::Path::Root root; - ASSERT_TRUE(fromJSON(serialized, deserialized, root)) - << llvm::toString(root.getError()); - EXPECT_EQ(deserialized, test_case.first); - } - - // Test invalid value. - llvm::json::Value invalid_value = "invalid_hint"; - Source::PresentationHint deserialized_invalid; - llvm::json::Path::Root root; - EXPECT_FALSE(fromJSON(invalid_value, deserialized_invalid, root)); -} - -TEST(ProtocolTypesTest, SteppingGranularity) { - // Test all SteppingGranularity values. - std::vector> test_cases = { - {eSteppingGranularityStatement, "statement"}, - {eSteppingGranularityLine, "line"}, - {eSteppingGranularityInstruction, "instruction"}}; - - for (const auto &test_case : test_cases) { - // Serialize the SteppingGranularity to JSON. - llvm::json::Value serialized = toJSON(test_case.first); - ASSERT_EQ(serialized.kind(), llvm::json::Value::Kind::String); - EXPECT_EQ(serialized.getAsString(), test_case.second); - - // Deserialize the JSON back to SteppingGranularity. - SteppingGranularity deserialized; - llvm::json::Path::Root root; - ASSERT_TRUE(fromJSON(serialized, deserialized, root)) - << llvm::toString(root.getError()); - EXPECT_EQ(deserialized, test_case.first); - } - - // Test invalid value. - llvm::json::Value invalid_value = "invalid_granularity"; - SteppingGranularity deserialized_invalid; - llvm::json::Path::Root root; - EXPECT_FALSE(fromJSON(invalid_value, deserialized_invalid, root)); -} - -TEST(ProtocolTypesTest, BreakpointReason) { - // Test all BreakpointReason values. - std::vector> test_cases = { - {BreakpointReason::eBreakpointReasonPending, "pending"}, - {BreakpointReason::eBreakpointReasonFailed, "failed"}}; - - for (const auto &test_case : test_cases) { - // Serialize the BreakpointReason to JSON. - llvm::json::Value serialized = toJSON(test_case.first); - ASSERT_EQ(serialized.kind(), llvm::json::Value::Kind::String); - EXPECT_EQ(serialized.getAsString(), test_case.second); - - // Deserialize the JSON back to BreakpointReason. - BreakpointReason deserialized; - llvm::json::Path::Root root; - ASSERT_TRUE(fromJSON(serialized, deserialized, root)) - << llvm::toString(root.getError()); - EXPECT_EQ(deserialized, test_case.first); - } - - // Test invalid value. - llvm::json::Value invalid_value = "invalid_reason"; - BreakpointReason deserialized_invalid; - llvm::json::Path::Root root; - EXPECT_FALSE(fromJSON(invalid_value, deserialized_invalid, root)); -} - -TEST(ProtocolTypesTest, DataBreakpointAccessType) { - // Test all DataBreakpointAccessType values. - std::vector> test_cases = - {{eDataBreakpointAccessTypeRead, "read"}, - {eDataBreakpointAccessTypeWrite, "write"}, - {eDataBreakpointAccessTypeReadWrite, "readWrite"}}; - - for (const auto &test_case : test_cases) { - // Serialize the DataBreakpointAccessType to JSON. - llvm::json::Value serialized = toJSON(test_case.first); - ASSERT_EQ(serialized.kind(), llvm::json::Value::Kind::String); - EXPECT_EQ(serialized.getAsString(), test_case.second); - - // Deserialize the JSON back to DataBreakpointAccessType. - DataBreakpointAccessType deserialized; - llvm::json::Path::Root root; - ASSERT_TRUE(fromJSON(serialized, deserialized, root)) - << llvm::toString(root.getError()); - EXPECT_EQ(deserialized, test_case.first); - } - - // Test invalid value - llvm::json::Value invalid_value = "invalid_access_type"; - DataBreakpointAccessType deserialized_invalid; - llvm::json::Path::Root root; - EXPECT_FALSE(fromJSON(invalid_value, deserialized_invalid, root)); -} - -TEST(ProtocolTypesTest, ColumnType) { - // Test all ColumnType values. - std::vector> test_cases = { - {eColumnTypeString, "string"}, - {eColumnTypeNumber, "number"}, - {eColumnTypeBoolean, "boolean"}, - {eColumnTypeTimestamp, "unixTimestampUTC"}}; - - for (const auto &test_case : test_cases) { - // Serialize the ColumnType to JSON. - llvm::json::Value serialized = toJSON(test_case.first); - ASSERT_EQ(serialized.kind(), llvm::json::Value::Kind::String); - EXPECT_EQ(serialized.getAsString(), test_case.second); - - // Deserialize the JSON back to ColumnType. - ColumnType deserialized; - llvm::json::Path::Root root; - ASSERT_TRUE(fromJSON(serialized, deserialized, root)) - << llvm::toString(root.getError()); - EXPECT_EQ(deserialized, test_case.first); - } - - // Test invalid value. - llvm::json::Value invalid_value = "invalid_column_type"; - ColumnType deserialized_invalid; - llvm::json::Path::Root root; - EXPECT_FALSE(fromJSON(invalid_value, deserialized_invalid, root)); -} - -TEST(ProtocolTypesTest, BreakpointModeApplicability) { - // Test all BreakpointModeApplicability values. - std::vector> - test_cases = {{eBreakpointModeApplicabilitySource, "source"}, - {eBreakpointModeApplicabilityException, "exception"}, - {eBreakpointModeApplicabilityData, "data"}, - {eBreakpointModeApplicabilityInstruction, "instruction"}}; - - for (const auto &test_case : test_cases) { - // Serialize the BreakpointModeApplicability to JSON. - llvm::json::Value serialized = toJSON(test_case.first); - ASSERT_EQ(serialized.kind(), llvm::json::Value::Kind::String); - EXPECT_EQ(serialized.getAsString(), test_case.second); - - // Deserialize the JSON back to BreakpointModeApplicability. - BreakpointModeApplicability deserialized; - llvm::json::Path::Root root; - ASSERT_TRUE(fromJSON(serialized, deserialized, root)) - << llvm::toString(root.getError()); - EXPECT_EQ(deserialized, test_case.first); - } - - // Test invalid value. - llvm::json::Value invalid_value = "invalid_applicability"; - BreakpointModeApplicability deserialized_invalid; - llvm::json::Path::Root root; - EXPECT_FALSE(fromJSON(invalid_value, deserialized_invalid, root)); -} - -TEST(ProtocolTypesTest, ChecksumAlgorithm) { - // Test all ChecksumAlgorithm values. - std::vector> test_cases = { - {eChecksumAlgorithmMD5, "MD5"}, - {eChecksumAlgorithmSHA1, "SHA1"}, - {eChecksumAlgorithmSHA256, "SHA256"}, - {eChecksumAlgorithmTimestamp, "timestamp"}}; - - for (const auto &test_case : test_cases) { - // Serialize the ChecksumAlgorithm to JSON. - llvm::json::Value serialized = toJSON(test_case.first); - ASSERT_EQ(serialized.kind(), llvm::json::Value::Kind::String); - EXPECT_EQ(serialized.getAsString(), test_case.second); - - // Deserialize the JSON back to ChecksumAlgorithm. - ChecksumAlgorithm deserialized; - llvm::json::Path::Root root; - ASSERT_TRUE(fromJSON(serialized, deserialized, root)) - << llvm::toString(root.getError()); - EXPECT_EQ(deserialized, test_case.first); - } - - // Test invalid value. - llvm::json::Value invalid_value = "invalid_algorithm"; - ChecksumAlgorithm deserialized_invalid; - llvm::json::Path::Root root; - EXPECT_FALSE(fromJSON(invalid_value, deserialized_invalid, root)); -} diff --git a/lldb/unittests/DAP/TestBase.cpp b/lldb/unittests/DAP/TestBase.cpp deleted file mode 100644 index eb146cb2fa9f4..0000000000000 --- a/lldb/unittests/DAP/TestBase.cpp +++ /dev/null @@ -1,70 +0,0 @@ -//===-- TestBase.cpp ------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "TestBase.h" -#include "Protocol/ProtocolBase.h" -#include "lldb/Host/File.h" -#include "lldb/Host/Pipe.h" -#include "llvm/Testing/Support/Error.h" - -using namespace llvm; -using namespace lldb; -using namespace lldb_dap; -using namespace lldb_dap::protocol; -using namespace lldb_dap_tests; -using lldb_private::File; -using lldb_private::NativeFile; -using lldb_private::Pipe; - -void PipeBase::SetUp() { - ASSERT_THAT_ERROR(input.CreateNew(false).ToError(), Succeeded()); - ASSERT_THAT_ERROR(output.CreateNew(false).ToError(), Succeeded()); -} - -void TransportBase::SetUp() { - PipeBase::SetUp(); - to_dap = std::make_unique( - "to_dap", nullptr, - std::make_shared(input.GetReadFileDescriptor(), - File::eOpenOptionReadOnly, - NativeFile::Unowned), - std::make_shared(output.GetWriteFileDescriptor(), - File::eOpenOptionWriteOnly, - NativeFile::Unowned)); - from_dap = std::make_unique( - "from_dap", nullptr, - std::make_shared(output.GetReadFileDescriptor(), - File::eOpenOptionReadOnly, - NativeFile::Unowned), - std::make_shared(input.GetWriteFileDescriptor(), - File::eOpenOptionWriteOnly, - NativeFile::Unowned)); -} - -void DAPTestBase::SetUp() { - TransportBase::SetUp(); - dap = std::make_unique( - /*log=*/nullptr, - /*default_repl_mode=*/ReplMode::Auto, - /*pre_init_commands=*/std::vector(), - /*transport=*/*to_dap); -} - -std::vector DAPTestBase::DrainOutput() { - std::vector msgs; - output.CloseWriteFileDescriptor(); - while (true) { - Expected next = from_dap->Read(std::chrono::milliseconds(1)); - if (!next) { - consumeError(next.takeError()); - break; - } - msgs.push_back(*next); - } - return msgs; -} diff --git a/lldb/unittests/DAP/TestBase.h b/lldb/unittests/DAP/TestBase.h deleted file mode 100644 index c789adf53c225..0000000000000 --- a/lldb/unittests/DAP/TestBase.h +++ /dev/null @@ -1,48 +0,0 @@ -//===-- TestBase.cpp ------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "DAP.h" -#include "Protocol/ProtocolBase.h" -#include "Transport.h" -#include "lldb/Host/Pipe.h" -#include "gtest/gtest.h" - -namespace lldb_dap_tests { - -/// A base class for tests that need a pair of pipes for communication. -class PipeBase : public testing::Test { -protected: - lldb_private::Pipe input; - lldb_private::Pipe output; - - void SetUp() override; -}; - -/// A base class for tests that need transport configured for communicating DAP -/// messages. -class TransportBase : public PipeBase { -protected: - std::unique_ptr to_dap; - std::unique_ptr from_dap; - - void SetUp() override; -}; - -/// A base class for tests that interact with a `lldb_dap::DAP` instance. -class DAPTestBase : public TransportBase { -protected: - std::unique_ptr dap; - - void SetUp() override; - - /// Closes the DAP output pipe and returns the remaining protocol messages in - /// the buffer. - std::vector DrainOutput(); -}; - -} // namespace lldb_dap_tests diff --git a/lldb/unittests/DAP/TransportTest.cpp b/lldb/unittests/DAP/TransportTest.cpp deleted file mode 100644 index e6dab42e30941..0000000000000 --- a/lldb/unittests/DAP/TransportTest.cpp +++ /dev/null @@ -1,94 +0,0 @@ -//===-- TransportTest.cpp -------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "Transport.h" -#include "Protocol/ProtocolBase.h" -#include "TestBase.h" -#include "lldb/Host/File.h" -#include "lldb/Host/Pipe.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Testing/Support/Error.h" -#include "gtest/gtest.h" -#include -#include -#include - -using namespace llvm; -using namespace lldb; -using namespace lldb_dap; -using namespace lldb_dap_tests; -using namespace lldb_dap::protocol; -using lldb_private::File; -using lldb_private::NativeFile; -using lldb_private::Pipe; - -class TransportTest : public PipeBase { -protected: - std::unique_ptr transport; - - void SetUp() override { - PipeBase::SetUp(); - transport = std::make_unique( - "stdio", nullptr, - std::make_shared(input.GetReadFileDescriptor(), - File::eOpenOptionReadOnly, - NativeFile::Unowned), - std::make_shared(output.GetWriteFileDescriptor(), - File::eOpenOptionWriteOnly, - NativeFile::Unowned)); - } -}; - -TEST_F(TransportTest, MalformedRequests) { - std::string malformed_header = "COnTent-LenGth: -1{}\r\n\r\nnotjosn"; - ASSERT_THAT_EXPECTED( - input.Write(malformed_header.data(), malformed_header.size()), - Succeeded()); - ASSERT_THAT_EXPECTED( - transport->Read(std::chrono::milliseconds(1)), - FailedWithMessage( - "expected 'Content-Length: ' and got 'COnTent-LenGth: '")); -} - -TEST_F(TransportTest, Read) { - std::string json = - R"json({"seq": 1, "type": "request", "command": "abc"})json"; - std::string message = - formatv("Content-Length: {0}\r\n\r\n{1}", json.size(), json).str(); - ASSERT_THAT_EXPECTED(input.Write(message.data(), message.size()), - Succeeded()); - ASSERT_THAT_EXPECTED( - transport->Read(std::chrono::milliseconds(1)), - HasValue(testing::VariantWith(testing::FieldsAre( - /*seq=*/1, /*command=*/"abc", /*arguments=*/std::nullopt)))); -} - -TEST_F(TransportTest, ReadWithTimeout) { - ASSERT_THAT_EXPECTED(transport->Read(std::chrono::milliseconds(1)), - Failed()); -} - -TEST_F(TransportTest, ReadWithEOF) { - input.CloseWriteFileDescriptor(); - ASSERT_THAT_EXPECTED(transport->Read(std::chrono::milliseconds(1)), - Failed()); -} - -TEST_F(TransportTest, Write) { - ASSERT_THAT_ERROR(transport->Write(Event{"my-event", std::nullopt}), - Succeeded()); - output.CloseWriteFileDescriptor(); - char buf[1024]; - Expected bytes_read = - output.Read(buf, sizeof(buf), std::chrono::milliseconds(1)); - ASSERT_THAT_EXPECTED(bytes_read, Succeeded()); - ASSERT_EQ( - StringRef(buf, *bytes_read), - StringRef("Content-Length: 43\r\n\r\n" - R"json({"event":"my-event","seq":0,"type":"event"})json")); -} diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake index 2912f45953c41..c427a65ee030c 100644 --- a/llvm/cmake/modules/HandleLLVMOptions.cmake +++ b/llvm/cmake/modules/HandleLLVMOptions.cmake @@ -1240,8 +1240,6 @@ if(LLVM_PROFDATA_FILE AND EXISTS ${LLVM_PROFDATA_FILE}) else() message(FATAL_ERROR "LLVM_PROFDATA_FILE can only be specified when compiling with clang") endif() -elseif(LLVM_PROFDATA_FILE) - message(WARNING "LLVM_PROFDATA_FILE specified, but ${LLVM_PROFDATA_FILE} not found") endif() option(LLVM_BUILD_INSTRUMENTED_COVERAGE "Build LLVM and tools with Code Coverage instrumentation" Off) diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 55b168b20d0e7..d1535960a0257 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -394,12 +394,12 @@ Every processor supports every OS ABI (see :ref:`amdgpu-os`) with the following **GCN GFX10.1 (RDNA 1)** [AMD-GCN-GFX10-RDNA1]_ ----------------------------------------------------------------------------------------------------------------------- - ``gfx1010`` ``amdgcn`` dGPU - cumode - Absolute - *rocm-amdhsa* - Radeon Pro 5600 XT - - wavefrontsize64 flat - *pal-amdhsa* - Radeon RX 5600M - - xnack scratch - *pal-amdpal* - Radeon RX 5700 - - Radeon RX 5700 XT + ``gfx1010`` ``amdgcn`` dGPU - cumode - Absolute - *rocm-amdhsa* - Radeon RX 5700 + - wavefrontsize64 flat - *pal-amdhsa* - Radeon RX 5700 XT + - xnack scratch - *pal-amdpal* - Radeon Pro 5600 XT + - Radeon Pro 5600M ``gfx1011`` ``amdgcn`` dGPU - cumode - *rocm-amdhsa* - Radeon Pro V520 - - wavefrontsize64 - Absolute - *pal-amdhsa* - Radeon Pro 5600M + - wavefrontsize64 - Absolute - *pal-amdhsa* - xnack flat - *pal-amdpal* scratch ``gfx1012`` ``amdgcn`` dGPU - cumode - Absolute - *rocm-amdhsa* - Radeon RX 5500 diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index fd627a694b818..5f14726c36672 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -3133,9 +3133,6 @@ as follows: program memory space defaults to the default address space of 0, which corresponds to a Von Neumann architecture that has code and data in the same space. - -.. _globals_addrspace: - ``G
`` Specifies the address space to be used by default when creating global variables. If omitted, the globals address space defaults to the default @@ -15064,8 +15061,7 @@ Syntax: :: - declare ptr @llvm.thread.pointer.p0() - declare ptr addrspace(5) @llvm.thread.pointer.p5() + declare ptr @llvm.thread.pointer() Overview: """"""""" @@ -15082,8 +15078,7 @@ specific: it may point to the start of TLS area, to the end, or somewhere in the middle. Depending on the target, this intrinsic may read a register, call a helper function, read from an alternate memory space, or perform other operations necessary to locate the TLS area. Not all targets support -this intrinsic. The address space must be the :ref:`globals address space -`. +this intrinsic. '``llvm.call.preallocated.setup``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -17200,14 +17195,12 @@ type. Semantics: """""""""" +If both operands are NaNs (including sNaN), returns qNaN. If one operand +is NaN (including sNaN) and another operand is a number, return the number. +Otherwise returns the lesser of the two arguments. -0.0 is considered to +be less than +0.0 for this intrinsic. -If both operands are NaNs (including sNaN), returns a :ref:`NaN `. If -one operand is NaN (including sNaN) and another operand is a number, -return the number. Otherwise returns the lesser of the two -arguments. -0.0 is considered to be less than +0.0 for this intrinsic. - -Note that these are the semantics of minimumNumber specified in -IEEE-754-2019 with the usual :ref:`signaling NaN ` exception. +Note that these are the semantics of minimumNumber specified in IEEE 754-2019. It has some differences with '``llvm.minnum.*``': 1)'``llvm.minnum.*``' will return qNaN if either operand is sNaN. @@ -17248,15 +17241,12 @@ type. Semantics: """""""""" +If both operands are NaNs (including sNaN), returns qNaN. If one operand +is NaN (including sNaN) and another operand is a number, return the number. +Otherwise returns the greater of the two arguments. -0.0 is considered to +be less than +0.0 for this intrinsic. -If both operands are NaNs (including sNaN), returns a -:ref:`NaN `. If one operand is NaN (including sNaN) and -another operand is a number, return the number. Otherwise returns the -greater of the two arguments. -0.0 is considered to be less than +0.0 -for this intrinsic. - -Note that these are the semantics of maximumNumber specified in -IEEE-754-2019 with the usual :ref:`signaling NaN ` exception. +Note that these are the semantics of maximumNumber specified in IEEE 754-2019. It has some differences with '``llvm.maxnum.*``': 1)'``llvm.maxnum.*``' will return qNaN if either operand is sNaN. diff --git a/llvm/docs/MLGO.rst b/llvm/docs/MLGO.rst index 7f5855966c9b3..692c9861d8caa 100644 --- a/llvm/docs/MLGO.rst +++ b/llvm/docs/MLGO.rst @@ -61,7 +61,6 @@ call, where the parameters and result are bound by name and are described by name, scalar type, and shape tuples. The main types in LLVM are: - - ``MLModelRunner`` - an abstraction for the decision making mechanism - ``TensorSpec`` which describes a tensor. diff --git a/llvm/docs/NVPTXUsage.rst b/llvm/docs/NVPTXUsage.rst index 957cccc6268e6..51bbfd0a5c88d 100644 --- a/llvm/docs/NVPTXUsage.rst +++ b/llvm/docs/NVPTXUsage.rst @@ -672,7 +672,6 @@ Syntax: .. code-block:: llvm declare void @llvm.nvvm.cp.async.bulk.shared.cta.to.global(ptr addrspace(1) %dst, ptr addrspace(3) %src, i32 %size, i64 %ch, i1 %flag_ch) - declare void @llvm.nvvm.cp.async.bulk.shared.cta.to.global.bytemask(..., i32 %size, i64 %ch, i1 %flag_ch, i16 %mask) Overview: """"""""" @@ -681,13 +680,10 @@ The '``@llvm.nvvm.cp.async.bulk.shared.cta.to.global``' intrinsic corresponds to the ``cp.async.bulk.global.shared::cta.*`` set of PTX instructions. These instructions initiate an asynchronous copy from shared::cta to global memory. The 32-bit operand ``%size`` specifies -the amount of memory to be copied (in bytes) and it must be a multiple -of 16. For the ``.bytemask`` variant, the 16-bit wide mask operand -specifies whether the i-th byte of each 16-byte wide chunk of source -data is copied to the destination. +the amount of memory to be copied and it must be a multiple of 16. -* The ``i1 %flag_ch`` argument to these intrinsics is a boolean - flag indicating support for cache_hint. This flag argument must +* The last argument to these intrinsics is a boolean flag + indicating support for cache_hint. This flag argument must be a compile-time constant. When set, it indicates a valid cache_hint (``i64 %ch``) and generates the ``.L2::cache_hint`` variant of the PTX instruction. diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst index cf11d3878a745..0ebe1764c6502 100644 --- a/llvm/docs/RISCVUsage.rst +++ b/llvm/docs/RISCVUsage.rst @@ -119,7 +119,6 @@ on support follow. ``E`` Supported (`See note <#riscv-rve-note>`__) ``H`` Assembly Support ``M`` Supported - ``Q`` Assembly Support ``Sha`` Supported ``Shcounterenw`` Assembly Support (`See note <#riscv-profiles-extensions-note>`__) ``Shgatpa`` Assembly Support (`See note <#riscv-profiles-extensions-note>`__) @@ -512,9 +511,6 @@ The current vendor extensions supported are: ``XAndesVPackFPH`` LLVM implements `version 5.0.0 of the Andes Vector Packed FP16 Extension specification `__ by Andes Technology. All instructions are prefixed with `nds.` as described in the specification. -``XAndesVDot`` - LLVM implements `version 5.0.0 of the Andes Vector Dot Product Extension specification `__ by Andes Technology. All instructions are prefixed with `nds.` as described in the specification. - Experimental C Intrinsics ========================= diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index 9c8cc599a8daf..f4bec50cfca46 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -186,10 +186,7 @@ Changes to the RISC-V Backend * Adds assembler support for the Andes `XAndesperf` (Andes Performance extension). * `-mcpu=sifive-p870` was added. * Adds assembler support for the Andes `XAndesvpackfph` (Andes Vector Packed FP16 extension). -* Adds assembler support for the Andes `XAndesvdot` (Andes Vector Dot Product extension). -* Adds assembler support for the standard `Q` (Quad-Precision Floating Point) - extension. - + Changes to the WebAssembly Backend ---------------------------------- diff --git a/llvm/include/llvm/ADT/APFixedPoint.h b/llvm/include/llvm/ADT/APFixedPoint.h index 89d2a93a06a26..70d7f325702cf 100644 --- a/llvm/include/llvm/ADT/APFixedPoint.h +++ b/llvm/include/llvm/ADT/APFixedPoint.h @@ -249,10 +249,7 @@ class APFixedPoint { } void print(raw_ostream &) const; - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - LLVM_DUMP_METHOD void dump() const; -#endif + void dump() const; // If LHS > RHS, return 1. If LHS == RHS, return 0. If LHS < RHS, return -1. int compare(const APFixedPoint &Other) const; diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h index b88cbc56c105c..ed49380cfc05f 100644 --- a/llvm/include/llvm/ADT/APFloat.h +++ b/llvm/include/llvm/ADT/APFloat.h @@ -1483,10 +1483,7 @@ class APFloat : public APFloatBase { } void print(raw_ostream &) const; - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - LLVM_DUMP_METHOD void dump() const; -#endif + void dump() const; bool getExactInverse(APFloat *inv) const { APFLOAT_DISPATCH_ON_SEMANTICS(getExactInverse(inv)); diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h index 44260c7eca309..7fbf09b44e6c4 100644 --- a/llvm/include/llvm/ADT/APInt.h +++ b/llvm/include/llvm/ADT/APInt.h @@ -1896,10 +1896,8 @@ class [[nodiscard]] APInt { /// FoldingSets. void Profile(FoldingSetNodeID &id) const; -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// debug method - LLVM_DUMP_METHOD void dump() const; -#endif + void dump() const; /// Returns whether this instance allocated memory. bool needsCleanup() const { return !isSingleWord(); } diff --git a/llvm/include/llvm/ADT/BitmaskEnum.h b/llvm/include/llvm/ADT/BitmaskEnum.h index 7214f25b0aa10..dcb13bd8ba51a 100644 --- a/llvm/include/llvm/ADT/BitmaskEnum.h +++ b/llvm/include/llvm/ADT/BitmaskEnum.h @@ -92,7 +92,6 @@ using ::llvm::BitmaskEnumDetail::operator^=; \ using ::llvm::BitmaskEnumDetail::operator<<=; \ using ::llvm::BitmaskEnumDetail::operator>>=; \ - using ::llvm::BitmaskEnumDetail::operator!; \ /* Force a semicolon at the end of this macro. */ \ using ::llvm::BitmaskEnumDetail::any @@ -142,11 +141,6 @@ constexpr unsigned bitWidth(uint64_t Value) { return Value ? 1 + bitWidth(Value >> 1) : 0; } -template ::value>> -constexpr bool operator!(E Val) { - return Val == static_cast(0); -} - template ::value>> constexpr bool any(E Val) { return Val != static_cast(0); diff --git a/llvm/include/llvm/ADT/DynamicAPInt.h b/llvm/include/llvm/ADT/DynamicAPInt.h index bb65a08a968d9..ff958d48e7731 100644 --- a/llvm/include/llvm/ADT/DynamicAPInt.h +++ b/llvm/include/llvm/ADT/DynamicAPInt.h @@ -216,9 +216,7 @@ class DynamicAPInt { void static_assert_layout(); // NOLINT raw_ostream &print(raw_ostream &OS) const; -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void dump() const; -#endif }; inline raw_ostream &operator<<(raw_ostream &OS, const DynamicAPInt &X) { diff --git a/llvm/include/llvm/ADT/SlowDynamicAPInt.h b/llvm/include/llvm/ADT/SlowDynamicAPInt.h index c9aef96b9e1c3..ec1021892cf4d 100644 --- a/llvm/include/llvm/ADT/SlowDynamicAPInt.h +++ b/llvm/include/llvm/ADT/SlowDynamicAPInt.h @@ -79,10 +79,7 @@ class SlowDynamicAPInt { unsigned getBitWidth() const { return Val.getBitWidth(); } void print(raw_ostream &OS) const; - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void dump() const; -#endif }; inline raw_ostream &operator<<(raw_ostream &OS, const SlowDynamicAPInt &X) { diff --git a/llvm/include/llvm/ADT/TrieRawHashMap.h b/llvm/include/llvm/ADT/TrieRawHashMap.h index 1382eac1c768f..e312967edeb58 100644 --- a/llvm/include/llvm/ADT/TrieRawHashMap.h +++ b/llvm/include/llvm/ADT/TrieRawHashMap.h @@ -90,10 +90,7 @@ class ThreadSafeTrieRawHashMapBase { static void *operator new(size_t Size) { return ::operator new(Size); } void operator delete(void *Ptr) { ::operator delete(Ptr); } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void dump() const; -#endif - void print(raw_ostream &OS) const; protected: @@ -217,10 +214,7 @@ class ThreadSafeTrieRawHashMap : public ThreadSafeTrieRawHashMapBase { using ThreadSafeTrieRawHashMapBase::operator delete; using HashType = HashT; -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) using ThreadSafeTrieRawHashMapBase::dump; -#endif - using ThreadSafeTrieRawHashMapBase::print; private: diff --git a/llvm/include/llvm/ADT/Twine.h b/llvm/include/llvm/ADT/Twine.h index d9e553a8a8c77..1f1fd1967efbc 100644 --- a/llvm/include/llvm/ADT/Twine.h +++ b/llvm/include/llvm/ADT/Twine.h @@ -507,16 +507,14 @@ namespace llvm { /// stream \p OS. void print(raw_ostream &OS) const; + /// Dump the concatenated string represented by this twine to stderr. + void dump() const; + /// Write the representation of this twine to the stream \p OS. void printRepr(raw_ostream &OS) const; -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - /// Dump the concatenated string represented by this twine to stderr. - LLVM_DUMP_METHOD void dump() const; - /// Dump the representation of this twine to stderr. - LLVM_DUMP_METHOD void dumpRepr() const; -#endif + void dumpRepr() const; /// @} }; diff --git a/llvm/include/llvm/Analysis/AliasAnalysis.h b/llvm/include/llvm/Analysis/AliasAnalysis.h index 16f54c394788d..d23b81854c9ea 100644 --- a/llvm/include/llvm/Analysis/AliasAnalysis.h +++ b/llvm/include/llvm/Analysis/AliasAnalysis.h @@ -1011,24 +1011,19 @@ struct ExternalAAWrapperPass : ImmutablePass { ExternalAAWrapperPass(); - explicit ExternalAAWrapperPass(CallbackT CB, bool RunEarly = false); + explicit ExternalAAWrapperPass(CallbackT CB); - /// Flag indicating whether this external AA should run before Basic AA. + /// Returns whether this external AA should run before Basic AA. /// - /// This flag is for LegacyPassManager only. To run an external AA early - /// with the NewPassManager, override the registerEarlyDefaultAliasAnalyses - /// method on the target machine. - /// - /// By default, external AA passes are run after Basic AA. If this flag is - /// set to true, the external AA will be run before Basic AA during alias - /// analysis. + /// By default, external AA passes are run after Basic AA. If this returns + /// true, the external AA will be run before Basic AA during alias analysis. /// /// For some targets, we prefer to run the external AA early to improve /// compile time as it has more target-specific information. This is /// particularly useful when the external AA can provide more precise results /// than Basic AA so that Basic AA does not need to spend time recomputing /// them. - bool RunEarly = false; + virtual bool runEarly() { return false; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index fea2ede8b5ab4..f715e0ec8dbb4 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -853,10 +853,11 @@ getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, /// is a simple API that does not depend on the analysis pass. /// \param StrictCheck Ensure that the calculated distance matches the /// type-based one after all the bitcasts removal in the provided pointers. -std::optional -getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB, Value *PtrB, - const DataLayout &DL, ScalarEvolution &SE, - bool StrictCheck = false, bool CheckType = true); +std::optional getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB, + Value *PtrB, const DataLayout &DL, + ScalarEvolution &SE, + bool StrictCheck = false, + bool CheckType = true); /// Attempt to sort the pointers in \p VL and return the sorted indices /// in \p SortedIndices, if reordering is required. diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 1aed98e8f50db..3f639138d8b75 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -680,9 +680,6 @@ class TargetTransformInfo { /// If the value is true the peeling cost model can decide to peel only /// some iterations and in this case it will set this to false. bool PeelProfiledIterations; - - /// Peel off the last PeelCount loop iterations. - bool PeelLast; }; /// Get target-customized preferences for the generic loop peeling diff --git a/llvm/include/llvm/BinaryFormat/DXContainerConstants.def b/llvm/include/llvm/BinaryFormat/DXContainerConstants.def index 81d2c54b6e07c..1645018aebedb 100644 --- a/llvm/include/llvm/BinaryFormat/DXContainerConstants.def +++ b/llvm/include/llvm/BinaryFormat/DXContainerConstants.def @@ -34,7 +34,7 @@ SHADER_FEATURE_FLAG(14, 19, WaveOps, "Wave level operations") SHADER_FEATURE_FLAG(15, 20, Int64Ops, "64-Bit integer") SHADER_FEATURE_FLAG(16, 21, ViewID, "View Instancing") SHADER_FEATURE_FLAG(17, 22, Barycentrics, "Barycentrics") -SHADER_FEATURE_FLAG(18, -1, NativeLowPrecision, "Native low-precision data types") +SHADER_FEATURE_FLAG(18, -1, NativeLowPrecision, "Use native low precision") SHADER_FEATURE_FLAG(19, 24, ShadingRate, "Shading Rate") SHADER_FEATURE_FLAG(20, 25, Raytracing_Tier_1_1, "Raytracing tier 1.1 features") SHADER_FEATURE_FLAG(21, 26, SamplerFeedback, "Sampler feedback") @@ -115,9 +115,9 @@ DXIL_MODULE_FLAG( 0, DisableOptimizations, "Disable shader optimizations") DXIL_MODULE_FLAG( 1, DisableMathRefactoring, "Disable math refactoring") DXIL_MODULE_FLAG( 3, ForceEarlyDepthStencil, "Force early depth-stencil test") DXIL_MODULE_FLAG( 4, EnableRawAndStructuredBuffers, "Raw and structured buffers") -DXIL_MODULE_FLAG( 5, LowPrecisionPresent, "Low-precision data types present") +DXIL_MODULE_FLAG( 5, LowPrecisionPresent, "Low-precision data types") DXIL_MODULE_FLAG( 8, AllResourcesBound, "All resources bound for the duration of shader execution") -DXIL_MODULE_FLAG(23, NativeLowPrecisionMode, "Enable native low-precision data types") +DXIL_MODULE_FLAG(23, UseNativeLowPrecision, "Use native low precision") DXIL_MODULE_FLAG(33, ResMayNotAlias, "Any UAV may not alias any other UAV") #undef DXIL_MODULE_FLAG diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index 9f66402e4c820..80ef32aff62ae 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -67,15 +67,6 @@ enum NodeType { /// poisoned the assertion will not be true for that value. AssertAlign, - /// AssertNoFPClass - These nodes record if a register contains a float - /// value that is known to be not some type. - /// This node takes two operands. The first is the node that is known - /// never to be some float types; the second is a constant value with - /// the value of FPClassTest (casted to uint32_t). - /// NOTE: In case of the source value (or any vector element value) is - /// poisoned the assertion will not be true for that value. - AssertNoFPClass, - /// Various leaf nodes. BasicBlock, VALUETYPE, @@ -1533,15 +1524,6 @@ enum NodeType { // Operands: Mask VECTOR_FIND_LAST_ACTIVE, - // GET_ACTIVE_LANE_MASK - this corrosponds to the llvm.get.active.lane.mask - // intrinsic. It creates a mask representing active and inactive vector - // lanes, active while Base + index < Trip Count. As with the intrinsic, - // the operands Base and Trip Count have the same scalar integer type and - // the internal addition of Base + index cannot overflow. However, the ISD - // node supports result types which are wider than i1, where the high - // bits conform to getBooleanContents similar to the SETCC operator. - GET_ACTIVE_LANE_MASK, - // llvm.clear_cache intrinsic // Operands: Input Chain, Start Addres, End Address // Outputs: Output Chain diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index 428fc35f8a400..d214ab9306c2f 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -14,9 +14,9 @@ #ifndef LLVM_CODEGEN_PASSES_H #define LLVM_CODEGEN_PASSES_H -#include "llvm/CodeGen/RegAllocCommon.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/Discriminator.h" +#include "llvm/CodeGen/RegAllocCommon.h" #include #include @@ -36,587 +36,587 @@ namespace vfs { class FileSystem; } // namespace vfs -} // namespace llvm +} // End llvm namespace // List of target independent CodeGen pass IDs. namespace llvm { -/// AtomicExpandPass - At IR level this pass replace atomic instructions with -/// __atomic_* library calls, or target specific instruction which implement the -/// same semantics in a way which better fits the target backend. -FunctionPass *createAtomicExpandLegacyPass(); - -/// createUnreachableBlockEliminationPass - The LLVM code generator does not -/// work well with unreachable basic blocks (what live ranges make sense for a -/// block that cannot be reached?). As such, a code generator should either -/// not instruction select unreachable blocks, or run this pass as its -/// last LLVM modifying pass to clean up blocks that are not reachable from -/// the entry block. -FunctionPass *createUnreachableBlockEliminationPass(); - -/// createGCEmptyBasicblocksPass - Empty basic blocks (basic blocks without -/// real code) appear as the result of optimization passes removing -/// instructions. These blocks confuscate profile analysis (e.g., basic block -/// sections) since they will share the address of their fallthrough blocks. -/// This pass garbage-collects such basic blocks. -MachineFunctionPass *createGCEmptyBasicBlocksPass(); - -/// createBasicBlockSections Pass - This pass assigns sections to machine -/// basic blocks and is enabled with -fbasic-block-sections. -MachineFunctionPass *createBasicBlockSectionsPass(); - -MachineFunctionPass *createBasicBlockPathCloningPass(); + /// AtomicExpandPass - At IR level this pass replace atomic instructions with + /// __atomic_* library calls, or target specific instruction which implement the + /// same semantics in a way which better fits the target backend. + FunctionPass *createAtomicExpandLegacyPass(); + + /// createUnreachableBlockEliminationPass - The LLVM code generator does not + /// work well with unreachable basic blocks (what live ranges make sense for a + /// block that cannot be reached?). As such, a code generator should either + /// not instruction select unreachable blocks, or run this pass as its + /// last LLVM modifying pass to clean up blocks that are not reachable from + /// the entry block. + FunctionPass *createUnreachableBlockEliminationPass(); + + /// createGCEmptyBasicblocksPass - Empty basic blocks (basic blocks without + /// real code) appear as the result of optimization passes removing + /// instructions. These blocks confuscate profile analysis (e.g., basic block + /// sections) since they will share the address of their fallthrough blocks. + /// This pass garbage-collects such basic blocks. + MachineFunctionPass *createGCEmptyBasicBlocksPass(); + + /// createBasicBlockSections Pass - This pass assigns sections to machine + /// basic blocks and is enabled with -fbasic-block-sections. + MachineFunctionPass *createBasicBlockSectionsPass(); + + MachineFunctionPass *createBasicBlockPathCloningPass(); -/// createMachineFunctionSplitterPass - This pass splits machine functions -/// using profile information. -MachineFunctionPass *createMachineFunctionSplitterPass(); + /// createMachineFunctionSplitterPass - This pass splits machine functions + /// using profile information. + MachineFunctionPass *createMachineFunctionSplitterPass(); -/// createStaticDataSplitterPass - This is a machine-function pass that -/// categorizes static data hotness using profile information. -MachineFunctionPass *createStaticDataSplitterPass(); + /// createStaticDataSplitterPass - This is a machine-function pass that + /// categorizes static data hotness using profile information. + MachineFunctionPass *createStaticDataSplitterPass(); -/// createStaticDataAnnotatorPASS - This is a module pass that reads from -/// StaticDataProfileInfoWrapperPass and annotates the section prefix of -/// global variables. -ModulePass *createStaticDataAnnotatorPass(); - -/// MachineFunctionPrinter pass - This pass prints out the machine function to -/// the given stream as a debugging tool. -MachineFunctionPass * -createMachineFunctionPrinterPass(raw_ostream &OS, - const std::string &Banner = ""); + /// createStaticDataAnnotatorPASS - This is a module pass that reads from + /// StaticDataProfileInfoWrapperPass and annotates the section prefix of + /// global variables. + ModulePass *createStaticDataAnnotatorPass(); + + /// MachineFunctionPrinter pass - This pass prints out the machine function to + /// the given stream as a debugging tool. + MachineFunctionPass * + createMachineFunctionPrinterPass(raw_ostream &OS, + const std::string &Banner =""); -/// StackFramePrinter pass - This pass prints out the machine function's -/// stack frame to the given stream as a debugging tool. -MachineFunctionPass *createStackFrameLayoutAnalysisPass(); + /// StackFramePrinter pass - This pass prints out the machine function's + /// stack frame to the given stream as a debugging tool. + MachineFunctionPass *createStackFrameLayoutAnalysisPass(); -/// MIRPrinting pass - this pass prints out the LLVM IR into the given stream -/// using the MIR serialization format. -MachineFunctionPass *createPrintMIRPass(raw_ostream &OS); + /// MIRPrinting pass - this pass prints out the LLVM IR into the given stream + /// using the MIR serialization format. + MachineFunctionPass *createPrintMIRPass(raw_ostream &OS); -/// This pass resets a MachineFunction when it has the FailedISel property -/// as if it was just created. -/// If EmitFallbackDiag is true, the pass will emit a -/// DiagnosticInfoISelFallback for every MachineFunction it resets. -/// If AbortOnFailedISel is true, abort compilation instead of resetting. -MachineFunctionPass *createResetMachineFunctionPass(bool EmitFallbackDiag, - bool AbortOnFailedISel); + /// This pass resets a MachineFunction when it has the FailedISel property + /// as if it was just created. + /// If EmitFallbackDiag is true, the pass will emit a + /// DiagnosticInfoISelFallback for every MachineFunction it resets. + /// If AbortOnFailedISel is true, abort compilation instead of resetting. + MachineFunctionPass *createResetMachineFunctionPass(bool EmitFallbackDiag, + bool AbortOnFailedISel); -/// createCodeGenPrepareLegacyPass - Transform the code to expose more pattern -/// matching during instruction selection. -FunctionPass *createCodeGenPrepareLegacyPass(); + /// createCodeGenPrepareLegacyPass - Transform the code to expose more pattern + /// matching during instruction selection. + FunctionPass *createCodeGenPrepareLegacyPass(); -/// This pass implements generation of target-specific intrinsics to support -/// handling of complex number arithmetic -FunctionPass *createComplexDeinterleavingPass(const TargetMachine *TM); + /// This pass implements generation of target-specific intrinsics to support + /// handling of complex number arithmetic + FunctionPass *createComplexDeinterleavingPass(const TargetMachine *TM); -/// AtomicExpandID -- Lowers atomic operations in terms of either cmpxchg -/// load-linked/store-conditional loops. -extern char &AtomicExpandID; + /// AtomicExpandID -- Lowers atomic operations in terms of either cmpxchg + /// load-linked/store-conditional loops. + extern char &AtomicExpandID; -/// MachineLoopInfo - This pass is a loop analysis pass. -extern char &MachineLoopInfoID; + /// MachineLoopInfo - This pass is a loop analysis pass. + extern char &MachineLoopInfoID; -/// MachineDominators - This pass is a machine dominators analysis pass. -extern char &MachineDominatorsID; - -/// MachineDominanaceFrontier - This pass is a machine dominators analysis. -extern char &MachineDominanceFrontierID; + /// MachineDominators - This pass is a machine dominators analysis pass. + extern char &MachineDominatorsID; + + /// MachineDominanaceFrontier - This pass is a machine dominators analysis. + extern char &MachineDominanceFrontierID; -/// MachineRegionInfo - This pass computes SESE regions for machine functions. -extern char &MachineRegionInfoPassID; - -/// EdgeBundles analysis - Bundle machine CFG edges. -extern char &EdgeBundlesWrapperLegacyID; - -/// LiveVariables pass - This pass computes the set of blocks in which each -/// variable is life and sets machine operand kill flags. -extern char &LiveVariablesID; - -/// PHIElimination - This pass eliminates machine instruction PHI nodes -/// by inserting copy instructions. This destroys SSA information, but is the -/// desired input for some register allocators. This pass is "required" by -/// these register allocator like this: AU.addRequiredID(PHIEliminationID); -extern char &PHIEliminationID; - -/// LiveIntervals - This analysis keeps track of the live ranges of virtual -/// and physical registers. -extern char &LiveIntervalsID; - -/// LiveStacks pass. An analysis keeping track of the liveness of stack slots. -extern char &LiveStacksID; - -/// TwoAddressInstruction - This pass reduces two-address instructions to -/// use two operands. This destroys SSA information but it is desired by -/// register allocators. -extern char &TwoAddressInstructionPassID; - -/// ProcessImpicitDefs pass - This pass removes IMPLICIT_DEFs. -extern char &ProcessImplicitDefsID; - -/// RegisterCoalescer - This pass merges live ranges to eliminate copies. -extern char &RegisterCoalescerID; - -/// MachineScheduler - This pass schedules machine instructions. -extern char &MachineSchedulerID; - -/// PostMachineScheduler - This pass schedules machine instructions postRA. -extern char &PostMachineSchedulerID; - -/// SpillPlacement analysis. Suggest optimal placement of spill code between -/// basic blocks. -extern char &SpillPlacementID; - -/// ShrinkWrap pass. Look for the best place to insert save and restore -// instruction and update the MachineFunctionInfo with that information. -extern char &ShrinkWrapID; - -/// LiveRangeShrink pass. Move instruction close to its definition to shrink -/// the definition's live range. -extern char &LiveRangeShrinkID; - -/// Greedy register allocator. -extern char &RAGreedyLegacyID; - -/// Basic register allocator. -extern char &RABasicID; - -/// VirtRegRewriter pass. Rewrite virtual registers to physical registers as -/// assigned in VirtRegMap. -extern char &VirtRegRewriterID; -FunctionPass *createVirtRegRewriter(bool ClearVirtRegs = true); + /// MachineRegionInfo - This pass computes SESE regions for machine functions. + extern char &MachineRegionInfoPassID; + + /// EdgeBundles analysis - Bundle machine CFG edges. + extern char &EdgeBundlesWrapperLegacyID; + + /// LiveVariables pass - This pass computes the set of blocks in which each + /// variable is life and sets machine operand kill flags. + extern char &LiveVariablesID; + + /// PHIElimination - This pass eliminates machine instruction PHI nodes + /// by inserting copy instructions. This destroys SSA information, but is the + /// desired input for some register allocators. This pass is "required" by + /// these register allocator like this: AU.addRequiredID(PHIEliminationID); + extern char &PHIEliminationID; + + /// LiveIntervals - This analysis keeps track of the live ranges of virtual + /// and physical registers. + extern char &LiveIntervalsID; + + /// LiveStacks pass. An analysis keeping track of the liveness of stack slots. + extern char &LiveStacksID; + + /// TwoAddressInstruction - This pass reduces two-address instructions to + /// use two operands. This destroys SSA information but it is desired by + /// register allocators. + extern char &TwoAddressInstructionPassID; + + /// ProcessImpicitDefs pass - This pass removes IMPLICIT_DEFs. + extern char &ProcessImplicitDefsID; + + /// RegisterCoalescer - This pass merges live ranges to eliminate copies. + extern char &RegisterCoalescerID; + + /// MachineScheduler - This pass schedules machine instructions. + extern char &MachineSchedulerID; + + /// PostMachineScheduler - This pass schedules machine instructions postRA. + extern char &PostMachineSchedulerID; + + /// SpillPlacement analysis. Suggest optimal placement of spill code between + /// basic blocks. + extern char &SpillPlacementID; + + /// ShrinkWrap pass. Look for the best place to insert save and restore + // instruction and update the MachineFunctionInfo with that information. + extern char &ShrinkWrapID; + + /// LiveRangeShrink pass. Move instruction close to its definition to shrink + /// the definition's live range. + extern char &LiveRangeShrinkID; + + /// Greedy register allocator. + extern char &RAGreedyLegacyID; + + /// Basic register allocator. + extern char &RABasicID; + + /// VirtRegRewriter pass. Rewrite virtual registers to physical registers as + /// assigned in VirtRegMap. + extern char &VirtRegRewriterID; + FunctionPass *createVirtRegRewriter(bool ClearVirtRegs = true); -/// UnreachableMachineBlockElimination - This pass removes unreachable -/// machine basic blocks. -extern char &UnreachableMachineBlockElimID; + /// UnreachableMachineBlockElimination - This pass removes unreachable + /// machine basic blocks. + extern char &UnreachableMachineBlockElimID; -/// DeadMachineInstructionElim - This pass removes dead machine instructions. -extern char &DeadMachineInstructionElimID; + /// DeadMachineInstructionElim - This pass removes dead machine instructions. + extern char &DeadMachineInstructionElimID; -/// This pass adds dead/undef flags after analyzing subregister lanes. -extern char &DetectDeadLanesID; + /// This pass adds dead/undef flags after analyzing subregister lanes. + extern char &DetectDeadLanesID; -/// This pass perform post-ra machine sink for COPY instructions. -extern char &PostRAMachineSinkingID; + /// This pass perform post-ra machine sink for COPY instructions. + extern char &PostRAMachineSinkingID; -/// This pass adds flow sensitive discriminators. -extern char &MIRAddFSDiscriminatorsID; + /// This pass adds flow sensitive discriminators. + extern char &MIRAddFSDiscriminatorsID; -/// This pass reads flow sensitive profile. -extern char &MIRProfileLoaderPassID; + /// This pass reads flow sensitive profile. + extern char &MIRProfileLoaderPassID; -// This pass gives undef values a Pseudo Instruction definition for -// Instructions to ensure early-clobber is followed when using the greedy -// register allocator. -extern char &InitUndefID; + // This pass gives undef values a Pseudo Instruction definition for + // Instructions to ensure early-clobber is followed when using the greedy + // register allocator. + extern char &InitUndefID; -/// FastRegisterAllocation Pass - This pass register allocates as fast as -/// possible. It is best suited for debug code where live ranges are short. -/// -FunctionPass *createFastRegisterAllocator(); -FunctionPass *createFastRegisterAllocator(RegAllocFilterFunc F, - bool ClearVirtRegs); + /// FastRegisterAllocation Pass - This pass register allocates as fast as + /// possible. It is best suited for debug code where live ranges are short. + /// + FunctionPass *createFastRegisterAllocator(); + FunctionPass *createFastRegisterAllocator(RegAllocFilterFunc F, + bool ClearVirtRegs); -/// BasicRegisterAllocation Pass - This pass implements a degenerate global -/// register allocator using the basic regalloc framework. -/// -FunctionPass *createBasicRegisterAllocator(); -FunctionPass *createBasicRegisterAllocator(RegAllocFilterFunc F); + /// BasicRegisterAllocation Pass - This pass implements a degenerate global + /// register allocator using the basic regalloc framework. + /// + FunctionPass *createBasicRegisterAllocator(); + FunctionPass *createBasicRegisterAllocator(RegAllocFilterFunc F); -/// Greedy register allocation pass - This pass implements a global register -/// allocator for optimized builds. -/// -FunctionPass *createGreedyRegisterAllocator(); -FunctionPass *createGreedyRegisterAllocator(RegAllocFilterFunc F); + /// Greedy register allocation pass - This pass implements a global register + /// allocator for optimized builds. + /// + FunctionPass *createGreedyRegisterAllocator(); + FunctionPass *createGreedyRegisterAllocator(RegAllocFilterFunc F); -/// PBQPRegisterAllocation Pass - This pass implements the Partitioned Boolean -/// Quadratic Prograaming (PBQP) based register allocator. -/// -FunctionPass *createDefaultPBQPRegisterAllocator(); - -/// PrologEpilogCodeInserter - This pass inserts prolog and epilog code, -/// and eliminates abstract frame references. -extern char &PrologEpilogCodeInserterID; -MachineFunctionPass *createPrologEpilogInserterPass(); + /// PBQPRegisterAllocation Pass - This pass implements the Partitioned Boolean + /// Quadratic Prograaming (PBQP) based register allocator. + /// + FunctionPass *createDefaultPBQPRegisterAllocator(); + + /// PrologEpilogCodeInserter - This pass inserts prolog and epilog code, + /// and eliminates abstract frame references. + extern char &PrologEpilogCodeInserterID; + MachineFunctionPass *createPrologEpilogInserterPass(); -/// ExpandPostRAPseudos - This pass expands pseudo instructions after -/// register allocation. -extern char &ExpandPostRAPseudosID; + /// ExpandPostRAPseudos - This pass expands pseudo instructions after + /// register allocation. + extern char &ExpandPostRAPseudosID; -/// PostRAHazardRecognizer - This pass runs the post-ra hazard -/// recognizer. -extern char &PostRAHazardRecognizerID; - -/// PostRAScheduler - This pass performs post register allocation -/// scheduling. -extern char &PostRASchedulerID; + /// PostRAHazardRecognizer - This pass runs the post-ra hazard + /// recognizer. + extern char &PostRAHazardRecognizerID; + + /// PostRAScheduler - This pass performs post register allocation + /// scheduling. + extern char &PostRASchedulerID; -/// BranchFolding - This pass performs machine code CFG based -/// optimizations to delete branches to branches, eliminate branches to -/// successor blocks (creating fall throughs), and eliminating branches over -/// branches. -extern char &BranchFolderPassID; + /// BranchFolding - This pass performs machine code CFG based + /// optimizations to delete branches to branches, eliminate branches to + /// successor blocks (creating fall throughs), and eliminating branches over + /// branches. + extern char &BranchFolderPassID; -/// BranchRelaxation - This pass replaces branches that need to jump further -/// than is supported by a branch instruction. -extern char &BranchRelaxationPassID; - -/// MachineFunctionPrinterPass - This pass prints out MachineInstr's. -extern char &MachineFunctionPrinterPassID; - -/// MIRPrintingPass - this pass prints out the LLVM IR using the MIR -/// serialization format. -extern char &MIRPrintingPassID; - -/// TailDuplicate - Duplicate blocks with unconditional branches -/// into tails of their predecessors. -extern char &TailDuplicateLegacyID; - -/// Duplicate blocks with unconditional branches into tails of their -/// predecessors. Variant that works before register allocation. -extern char &EarlyTailDuplicateLegacyID; - -/// MachineTraceMetrics - This pass computes critical path and CPU resource -/// usage in an ensemble of traces. -extern char &MachineTraceMetricsID; - -/// EarlyIfConverter - This pass performs if-conversion on SSA form by -/// inserting cmov instructions. -extern char &EarlyIfConverterLegacyID; + /// BranchRelaxation - This pass replaces branches that need to jump further + /// than is supported by a branch instruction. + extern char &BranchRelaxationPassID; + + /// MachineFunctionPrinterPass - This pass prints out MachineInstr's. + extern char &MachineFunctionPrinterPassID; + + /// MIRPrintingPass - this pass prints out the LLVM IR using the MIR + /// serialization format. + extern char &MIRPrintingPassID; + + /// TailDuplicate - Duplicate blocks with unconditional branches + /// into tails of their predecessors. + extern char &TailDuplicateLegacyID; + + /// Duplicate blocks with unconditional branches into tails of their + /// predecessors. Variant that works before register allocation. + extern char &EarlyTailDuplicateLegacyID; + + /// MachineTraceMetrics - This pass computes critical path and CPU resource + /// usage in an ensemble of traces. + extern char &MachineTraceMetricsID; + + /// EarlyIfConverter - This pass performs if-conversion on SSA form by + /// inserting cmov instructions. + extern char &EarlyIfConverterLegacyID; -/// EarlyIfPredicator - This pass performs if-conversion on SSA form by -/// predicating if/else block and insert select at the join point. -extern char &EarlyIfPredicatorID; + /// EarlyIfPredicator - This pass performs if-conversion on SSA form by + /// predicating if/else block and insert select at the join point. + extern char &EarlyIfPredicatorID; -/// This pass performs instruction combining using trace metrics to estimate -/// critical-path and resource depth. -extern char &MachineCombinerID; + /// This pass performs instruction combining using trace metrics to estimate + /// critical-path and resource depth. + extern char &MachineCombinerID; -/// StackSlotColoring - This pass performs stack coloring and merging. -/// It merges disjoint allocas to reduce the stack size. -extern char &StackColoringLegacyID; + /// StackSlotColoring - This pass performs stack coloring and merging. + /// It merges disjoint allocas to reduce the stack size. + extern char &StackColoringLegacyID; -/// StackFramePrinter - This pass prints the stack frame layout and variable -/// mappings. -extern char &StackFrameLayoutAnalysisPassID; - -/// IfConverter - This pass performs machine code if conversion. -extern char &IfConverterID; - -FunctionPass * -createIfConverter(std::function Ftor); - -/// MachineBlockPlacement - This pass places basic blocks based on branch -/// probabilities. -extern char &MachineBlockPlacementID; - -/// MachineBlockPlacementStats - This pass collects statistics about the -/// basic block placement using branch probabilities and block frequency -/// information. -extern char &MachineBlockPlacementStatsID; - -/// GCLowering Pass - Used by gc.root to perform its default lowering -/// operations. -FunctionPass *createGCLoweringPass(); - -/// GCLowering Pass - Used by gc.root to perform its default lowering -/// operations. -extern char &GCLoweringID; + /// StackFramePrinter - This pass prints the stack frame layout and variable + /// mappings. + extern char &StackFrameLayoutAnalysisPassID; + + /// IfConverter - This pass performs machine code if conversion. + extern char &IfConverterID; + + FunctionPass *createIfConverter( + std::function Ftor); + + /// MachineBlockPlacement - This pass places basic blocks based on branch + /// probabilities. + extern char &MachineBlockPlacementID; + + /// MachineBlockPlacementStats - This pass collects statistics about the + /// basic block placement using branch probabilities and block frequency + /// information. + extern char &MachineBlockPlacementStatsID; + + /// GCLowering Pass - Used by gc.root to perform its default lowering + /// operations. + FunctionPass *createGCLoweringPass(); + + /// GCLowering Pass - Used by gc.root to perform its default lowering + /// operations. + extern char &GCLoweringID; -/// ShadowStackGCLowering - Implements the custom lowering mechanism -/// used by the shadow stack GC. Only runs on functions which opt in to -/// the shadow stack collector. -FunctionPass *createShadowStackGCLoweringPass(); + /// ShadowStackGCLowering - Implements the custom lowering mechanism + /// used by the shadow stack GC. Only runs on functions which opt in to + /// the shadow stack collector. + FunctionPass *createShadowStackGCLoweringPass(); -/// ShadowStackGCLowering - Implements the custom lowering mechanism -/// used by the shadow stack GC. -extern char &ShadowStackGCLoweringID; + /// ShadowStackGCLowering - Implements the custom lowering mechanism + /// used by the shadow stack GC. + extern char &ShadowStackGCLoweringID; -/// GCMachineCodeAnalysis - Target-independent pass to mark safe points -/// in machine code. Must be added very late during code generation, just -/// prior to output, and importantly after all CFG transformations (such as -/// branch folding). -extern char &GCMachineCodeAnalysisID; + /// GCMachineCodeAnalysis - Target-independent pass to mark safe points + /// in machine code. Must be added very late during code generation, just + /// prior to output, and importantly after all CFG transformations (such as + /// branch folding). + extern char &GCMachineCodeAnalysisID; -/// MachineCSE - This pass performs global CSE on machine instructions. -extern char &MachineCSELegacyID; + /// MachineCSE - This pass performs global CSE on machine instructions. + extern char &MachineCSELegacyID; -/// MIRCanonicalizer - This pass canonicalizes MIR by renaming vregs -/// according to the semantics of the instruction as well as hoists -/// code. -extern char &MIRCanonicalizerID; + /// MIRCanonicalizer - This pass canonicalizes MIR by renaming vregs + /// according to the semantics of the instruction as well as hoists + /// code. + extern char &MIRCanonicalizerID; -/// ImplicitNullChecks - This pass folds null pointer checks into nearby -/// memory operations. -extern char &ImplicitNullChecksID; + /// ImplicitNullChecks - This pass folds null pointer checks into nearby + /// memory operations. + extern char &ImplicitNullChecksID; -/// This pass performs loop invariant code motion on machine instructions. -extern char &MachineLICMID; + /// This pass performs loop invariant code motion on machine instructions. + extern char &MachineLICMID; -/// This pass performs loop invariant code motion on machine instructions. -/// This variant works before register allocation. \see MachineLICMID. -extern char &EarlyMachineLICMID; + /// This pass performs loop invariant code motion on machine instructions. + /// This variant works before register allocation. \see MachineLICMID. + extern char &EarlyMachineLICMID; -/// MachineSinking - This pass performs sinking on machine instructions. -extern char &MachineSinkingLegacyID; + /// MachineSinking - This pass performs sinking on machine instructions. + extern char &MachineSinkingLegacyID; -/// MachineCopyPropagation - This pass performs copy propagation on -/// machine instructions. -extern char &MachineCopyPropagationID; + /// MachineCopyPropagation - This pass performs copy propagation on + /// machine instructions. + extern char &MachineCopyPropagationID; -MachineFunctionPass *createMachineCopyPropagationPass(bool UseCopyInstr); + MachineFunctionPass *createMachineCopyPropagationPass(bool UseCopyInstr); -/// MachineLateInstrsCleanup - This pass removes redundant identical -/// instructions after register allocation and rematerialization. -extern char &MachineLateInstrsCleanupID; + /// MachineLateInstrsCleanup - This pass removes redundant identical + /// instructions after register allocation and rematerialization. + extern char &MachineLateInstrsCleanupID; -/// PeepholeOptimizer - This pass performs peephole optimizations - -/// like extension and comparison eliminations. -extern char &PeepholeOptimizerLegacyID; + /// PeepholeOptimizer - This pass performs peephole optimizations - + /// like extension and comparison eliminations. + extern char &PeepholeOptimizerLegacyID; -/// OptimizePHIs - This pass optimizes machine instruction PHIs -/// to take advantage of opportunities created during DAG legalization. -extern char &OptimizePHIsLegacyID; + /// OptimizePHIs - This pass optimizes machine instruction PHIs + /// to take advantage of opportunities created during DAG legalization. + extern char &OptimizePHIsLegacyID; -/// StackSlotColoring - This pass performs stack slot coloring. -extern char &StackSlotColoringID; + /// StackSlotColoring - This pass performs stack slot coloring. + extern char &StackSlotColoringID; -/// This pass lays out funclets contiguously. -extern char &FuncletLayoutID; + /// This pass lays out funclets contiguously. + extern char &FuncletLayoutID; -/// This pass inserts the XRay instrumentation sleds if they are supported by -/// the target platform. -extern char &XRayInstrumentationID; + /// This pass inserts the XRay instrumentation sleds if they are supported by + /// the target platform. + extern char &XRayInstrumentationID; -/// This pass inserts FEntry calls -extern char &FEntryInserterID; - -/// This pass implements the "patchable-function" attribute. -extern char &PatchableFunctionID; - -/// createStackProtectorPass - This pass adds stack protectors to functions. -/// -FunctionPass *createStackProtectorPass(); - -/// createMachineVerifierPass - This pass verifies cenerated machine code -/// instructions for correctness. -/// -FunctionPass *createMachineVerifierPass(const std::string &Banner); - -/// createDwarfEHPass - This pass mulches exception handling code into a form -/// adapted to code generation. Required if using dwarf exception handling. -FunctionPass *createDwarfEHPass(CodeGenOptLevel OptLevel); - -/// createWinEHPass - Prepares personality functions used by MSVC on Windows, -/// in addition to the Itanium LSDA based personalities. -FunctionPass *createWinEHPass(bool DemoteCatchSwitchPHIOnly = false); - -/// createSjLjEHPreparePass - This pass adapts exception handling code to use -/// the GCC-style builtin setjmp/longjmp (sjlj) to handling EH control flow. -/// -FunctionPass *createSjLjEHPreparePass(const TargetMachine *TM); - -/// createWasmEHPass - This pass adapts exception handling code to use -/// WebAssembly's exception handling scheme. -FunctionPass *createWasmEHPass(); - -/// LocalStackSlotAllocation - This pass assigns local frame indices to stack -/// slots relative to one another and allocates base registers to access them -/// when it is estimated by the target to be out of range of normal frame -/// pointer or stack pointer index addressing. -extern char &LocalStackSlotAllocationID; + /// This pass inserts FEntry calls + extern char &FEntryInserterID; + + /// This pass implements the "patchable-function" attribute. + extern char &PatchableFunctionID; + + /// createStackProtectorPass - This pass adds stack protectors to functions. + /// + FunctionPass *createStackProtectorPass(); + + /// createMachineVerifierPass - This pass verifies cenerated machine code + /// instructions for correctness. + /// + FunctionPass *createMachineVerifierPass(const std::string& Banner); + + /// createDwarfEHPass - This pass mulches exception handling code into a form + /// adapted to code generation. Required if using dwarf exception handling. + FunctionPass *createDwarfEHPass(CodeGenOptLevel OptLevel); + + /// createWinEHPass - Prepares personality functions used by MSVC on Windows, + /// in addition to the Itanium LSDA based personalities. + FunctionPass *createWinEHPass(bool DemoteCatchSwitchPHIOnly = false); + + /// createSjLjEHPreparePass - This pass adapts exception handling code to use + /// the GCC-style builtin setjmp/longjmp (sjlj) to handling EH control flow. + /// + FunctionPass *createSjLjEHPreparePass(const TargetMachine *TM); + + /// createWasmEHPass - This pass adapts exception handling code to use + /// WebAssembly's exception handling scheme. + FunctionPass *createWasmEHPass(); + + /// LocalStackSlotAllocation - This pass assigns local frame indices to stack + /// slots relative to one another and allocates base registers to access them + /// when it is estimated by the target to be out of range of normal frame + /// pointer or stack pointer index addressing. + extern char &LocalStackSlotAllocationID; -/// This pass expands pseudo-instructions, reserves registers and adjusts -/// machine frame information. -extern char &FinalizeISelID; + /// This pass expands pseudo-instructions, reserves registers and adjusts + /// machine frame information. + extern char &FinalizeISelID; -/// UnpackMachineBundles - This pass unpack machine instruction bundles. -extern char &UnpackMachineBundlesID; + /// UnpackMachineBundles - This pass unpack machine instruction bundles. + extern char &UnpackMachineBundlesID; -FunctionPass * -createUnpackMachineBundles(std::function Ftor); + FunctionPass * + createUnpackMachineBundles(std::function Ftor); -/// FinalizeMachineBundles - This pass finalize machine instruction -/// bundles (created earlier, e.g. during pre-RA scheduling). -extern char &FinalizeMachineBundlesID; + /// FinalizeMachineBundles - This pass finalize machine instruction + /// bundles (created earlier, e.g. during pre-RA scheduling). + extern char &FinalizeMachineBundlesID; -/// StackMapLiveness - This pass analyses the register live-out set of -/// stackmap/patchpoint intrinsics and attaches the calculated information to -/// the intrinsic for later emission to the StackMap. -extern char &StackMapLivenessID; + /// StackMapLiveness - This pass analyses the register live-out set of + /// stackmap/patchpoint intrinsics and attaches the calculated information to + /// the intrinsic for later emission to the StackMap. + extern char &StackMapLivenessID; -// MachineSanitizerBinaryMetadata - appends/finalizes sanitizer binary -// metadata after llvm SanitizerBinaryMetadata pass. -extern char &MachineSanitizerBinaryMetadataID; + // MachineSanitizerBinaryMetadata - appends/finalizes sanitizer binary + // metadata after llvm SanitizerBinaryMetadata pass. + extern char &MachineSanitizerBinaryMetadataID; -/// RemoveLoadsIntoFakeUses pass. -extern char &RemoveLoadsIntoFakeUsesID; + /// RemoveLoadsIntoFakeUses pass. + extern char &RemoveLoadsIntoFakeUsesID; -/// RemoveRedundantDebugValues pass. -extern char &RemoveRedundantDebugValuesID; + /// RemoveRedundantDebugValues pass. + extern char &RemoveRedundantDebugValuesID; -/// MachineCFGPrinter pass. -extern char &MachineCFGPrinterID; + /// MachineCFGPrinter pass. + extern char &MachineCFGPrinterID; -/// LiveDebugValues pass -extern char &LiveDebugValuesID; + /// LiveDebugValues pass + extern char &LiveDebugValuesID; -/// InterleavedAccess Pass - This pass identifies and matches interleaved -/// memory accesses to target specific intrinsics. -/// -FunctionPass *createInterleavedAccessPass(); + /// InterleavedAccess Pass - This pass identifies and matches interleaved + /// memory accesses to target specific intrinsics. + /// + FunctionPass *createInterleavedAccessPass(); -/// InterleavedLoadCombines Pass - This pass identifies interleaved loads and -/// combines them into wide loads detectable by InterleavedAccessPass -/// -FunctionPass *createInterleavedLoadCombinePass(); + /// InterleavedLoadCombines Pass - This pass identifies interleaved loads and + /// combines them into wide loads detectable by InterleavedAccessPass + /// + FunctionPass *createInterleavedLoadCombinePass(); -/// LowerEmuTLS - This pass generates __emutls_[vt].xyz variables for all -/// TLS variables for the emulated TLS model. -/// -ModulePass *createLowerEmuTLSPass(); + /// LowerEmuTLS - This pass generates __emutls_[vt].xyz variables for all + /// TLS variables for the emulated TLS model. + /// + ModulePass *createLowerEmuTLSPass(); -/// This pass lowers the \@llvm.load.relative and \@llvm.objc.* intrinsics to -/// instructions. This is unsafe to do earlier because a pass may combine the -/// constant initializer into the load, which may result in an overflowing -/// evaluation. -ModulePass *createPreISelIntrinsicLoweringPass(); + /// This pass lowers the \@llvm.load.relative and \@llvm.objc.* intrinsics to + /// instructions. This is unsafe to do earlier because a pass may combine the + /// constant initializer into the load, which may result in an overflowing + /// evaluation. + ModulePass *createPreISelIntrinsicLoweringPass(); -/// GlobalMerge - This pass merges internal (by default) globals into structs -/// to enable reuse of a base pointer by indexed addressing modes. -/// It can also be configured to focus on size optimizations only. -/// -Pass *createGlobalMergePass(const TargetMachine *TM, unsigned MaximalOffset, - bool OnlyOptimizeForSize = false, - bool MergeExternalByDefault = false, - bool MergeConstantByDefault = false, - bool MergeConstAggressiveByDefault = false); + /// GlobalMerge - This pass merges internal (by default) globals into structs + /// to enable reuse of a base pointer by indexed addressing modes. + /// It can also be configured to focus on size optimizations only. + /// + Pass *createGlobalMergePass(const TargetMachine *TM, unsigned MaximalOffset, + bool OnlyOptimizeForSize = false, + bool MergeExternalByDefault = false, + bool MergeConstantByDefault = false, + bool MergeConstAggressiveByDefault = false); -/// This pass splits the stack into a safe stack and an unsafe stack to -/// protect against stack-based overflow vulnerabilities. -FunctionPass *createSafeStackPass(); + /// This pass splits the stack into a safe stack and an unsafe stack to + /// protect against stack-based overflow vulnerabilities. + FunctionPass *createSafeStackPass(); -/// This pass detects subregister lanes in a virtual register that are used -/// independently of other lanes and splits them into separate virtual -/// registers. -extern char &RenameIndependentSubregsID; + /// This pass detects subregister lanes in a virtual register that are used + /// independently of other lanes and splits them into separate virtual + /// registers. + extern char &RenameIndependentSubregsID; -/// This pass is executed POST-RA to collect which physical registers are -/// preserved by given machine function. -FunctionPass *createRegUsageInfoCollector(); + /// This pass is executed POST-RA to collect which physical registers are + /// preserved by given machine function. + FunctionPass *createRegUsageInfoCollector(); -/// Return a MachineFunction pass that identifies call sites -/// and propagates register usage information of callee to caller -/// if available with PysicalRegisterUsageInfo pass. -FunctionPass *createRegUsageInfoPropPass(); + /// Return a MachineFunction pass that identifies call sites + /// and propagates register usage information of callee to caller + /// if available with PysicalRegisterUsageInfo pass. + FunctionPass *createRegUsageInfoPropPass(); -/// This pass performs software pipelining on machine instructions. -extern char &MachinePipelinerID; + /// This pass performs software pipelining on machine instructions. + extern char &MachinePipelinerID; -/// This pass frees the memory occupied by the MachineFunction. -FunctionPass *createFreeMachineFunctionPass(); + /// This pass frees the memory occupied by the MachineFunction. + FunctionPass *createFreeMachineFunctionPass(); -/// This pass performs merging similar functions globally. -ModulePass *createGlobalMergeFuncPass(); + /// This pass performs merging similar functions globally. + ModulePass *createGlobalMergeFuncPass(); -/// This pass performs outlining on machine instructions directly before -/// printing assembly. -ModulePass *createMachineOutlinerPass(bool RunOnAllFunctions = true); + /// This pass performs outlining on machine instructions directly before + /// printing assembly. + ModulePass *createMachineOutlinerPass(bool RunOnAllFunctions = true); -/// This pass expands the reduction intrinsics into sequences of shuffles. -FunctionPass *createExpandReductionsPass(); + /// This pass expands the reduction intrinsics into sequences of shuffles. + FunctionPass *createExpandReductionsPass(); -// This pass replaces intrinsics operating on vector operands with calls to -// the corresponding function in a vector library (e.g., SVML, libmvec). -FunctionPass *createReplaceWithVeclibLegacyPass(); + // This pass replaces intrinsics operating on vector operands with calls to + // the corresponding function in a vector library (e.g., SVML, libmvec). + FunctionPass *createReplaceWithVeclibLegacyPass(); -// Expands large div/rem instructions. -FunctionPass *createExpandLargeDivRemPass(); + // Expands large div/rem instructions. + FunctionPass *createExpandLargeDivRemPass(); -// Expands large div/rem instructions. -FunctionPass *createExpandFpPass(); + // Expands large div/rem instructions. + FunctionPass *createExpandFpPass(); -// This pass expands memcmp() to load/stores. -FunctionPass *createExpandMemCmpLegacyPass(); + // This pass expands memcmp() to load/stores. + FunctionPass *createExpandMemCmpLegacyPass(); -/// Creates Break False Dependencies pass. \see BreakFalseDeps.cpp -FunctionPass *createBreakFalseDeps(); + /// Creates Break False Dependencies pass. \see BreakFalseDeps.cpp + FunctionPass *createBreakFalseDeps(); -// This pass expands indirectbr instructions. -FunctionPass *createIndirectBrExpandPass(); + // This pass expands indirectbr instructions. + FunctionPass *createIndirectBrExpandPass(); -/// Creates CFI Fixup pass. \see CFIFixup.cpp -FunctionPass *createCFIFixup(); + /// Creates CFI Fixup pass. \see CFIFixup.cpp + FunctionPass *createCFIFixup(); -/// Creates CFI Instruction Inserter pass. \see CFIInstrInserter.cpp -FunctionPass *createCFIInstrInserter(); + /// Creates CFI Instruction Inserter pass. \see CFIInstrInserter.cpp + FunctionPass *createCFIInstrInserter(); -/// Creates CFGuard longjmp target identification pass. -/// \see CFGuardLongjmp.cpp -FunctionPass *createCFGuardLongjmpPass(); + /// Creates CFGuard longjmp target identification pass. + /// \see CFGuardLongjmp.cpp + FunctionPass *createCFGuardLongjmpPass(); -/// Creates Windows EH Continuation Guard target identification pass. -/// \see EHContGuardTargets.cpp -FunctionPass *createEHContGuardTargetsPass(); + /// Creates Windows EH Continuation Guard target identification pass. + /// \see EHContGuardTargets.cpp + FunctionPass *createEHContGuardTargetsPass(); -/// Create Hardware Loop pass. \see HardwareLoops.cpp -FunctionPass *createHardwareLoopsLegacyPass(); + /// Create Hardware Loop pass. \see HardwareLoops.cpp + FunctionPass *createHardwareLoopsLegacyPass(); -/// This pass inserts pseudo probe annotation for callsite profiling. -FunctionPass *createPseudoProbeInserter(); + /// This pass inserts pseudo probe annotation for callsite profiling. + FunctionPass *createPseudoProbeInserter(); -/// Create IR Type Promotion pass. \see TypePromotion.cpp -FunctionPass *createTypePromotionLegacyPass(); + /// Create IR Type Promotion pass. \see TypePromotion.cpp + FunctionPass *createTypePromotionLegacyPass(); -/// Add Flow Sensitive Discriminators. PassNum specifies the -/// sequence number of this pass (starting from 1). -FunctionPass * -createMIRAddFSDiscriminatorsPass(sampleprof::FSDiscriminatorPass P); + /// Add Flow Sensitive Discriminators. PassNum specifies the + /// sequence number of this pass (starting from 1). + FunctionPass * + createMIRAddFSDiscriminatorsPass(sampleprof::FSDiscriminatorPass P); -/// Read Flow Sensitive Profile. -FunctionPass * -createMIRProfileLoaderPass(std::string File, std::string RemappingFile, - sampleprof::FSDiscriminatorPass P, - IntrusiveRefCntPtr FS); + /// Read Flow Sensitive Profile. + FunctionPass * + createMIRProfileLoaderPass(std::string File, std::string RemappingFile, + sampleprof::FSDiscriminatorPass P, + IntrusiveRefCntPtr FS); -/// Creates MIR Debugify pass. \see MachineDebugify.cpp -ModulePass *createDebugifyMachineModulePass(); + /// Creates MIR Debugify pass. \see MachineDebugify.cpp + ModulePass *createDebugifyMachineModulePass(); -/// Creates MIR Strip Debug pass. \see MachineStripDebug.cpp -/// If OnlyDebugified is true then it will only strip debug info if it was -/// added by a Debugify pass. The module will be left unchanged if the debug -/// info was generated by another source such as clang. -ModulePass *createStripDebugMachineModulePass(bool OnlyDebugified); + /// Creates MIR Strip Debug pass. \see MachineStripDebug.cpp + /// If OnlyDebugified is true then it will only strip debug info if it was + /// added by a Debugify pass. The module will be left unchanged if the debug + /// info was generated by another source such as clang. + ModulePass *createStripDebugMachineModulePass(bool OnlyDebugified); -/// Creates MIR Check Debug pass. \see MachineCheckDebugify.cpp -ModulePass *createCheckDebugMachineModulePass(); + /// Creates MIR Check Debug pass. \see MachineCheckDebugify.cpp + ModulePass *createCheckDebugMachineModulePass(); -/// The pass fixups statepoint machine instruction to replace usage of -/// caller saved registers with stack slots. -extern char &FixupStatepointCallerSavedID; + /// The pass fixups statepoint machine instruction to replace usage of + /// caller saved registers with stack slots. + extern char &FixupStatepointCallerSavedID; -/// The pass transforms load/store <256 x i32> to AMX load/store intrinsics -/// or split the data to two <128 x i32>. -FunctionPass *createX86LowerAMXTypePass(); + /// The pass transforms load/store <256 x i32> to AMX load/store intrinsics + /// or split the data to two <128 x i32>. + FunctionPass *createX86LowerAMXTypePass(); -/// The pass transforms amx intrinsics to scalar operation if the function has -/// optnone attribute or it is O0. -FunctionPass *createX86LowerAMXIntrinsicsPass(); + /// The pass transforms amx intrinsics to scalar operation if the function has + /// optnone attribute or it is O0. + FunctionPass *createX86LowerAMXIntrinsicsPass(); -/// When learning an eviction policy, extract score(reward) information, -/// otherwise this does nothing -FunctionPass *createRegAllocScoringPass(); + /// When learning an eviction policy, extract score(reward) information, + /// otherwise this does nothing + FunctionPass *createRegAllocScoringPass(); -/// JMC instrument pass. -ModulePass *createJMCInstrumenterPass(); + /// JMC instrument pass. + ModulePass *createJMCInstrumenterPass(); -/// This pass converts conditional moves to conditional jumps when profitable. -FunctionPass *createSelectOptimizePass(); + /// This pass converts conditional moves to conditional jumps when profitable. + FunctionPass *createSelectOptimizePass(); -FunctionPass *createCallBrPass(); + FunctionPass *createCallBrPass(); -/// Lowers KCFI operand bundles for indirect calls. -FunctionPass *createKCFIPass(); -} // namespace llvm + /// Lowers KCFI operand bundles for indirect calls. + FunctionPass *createKCFIPass(); +} // End llvm namespace #endif diff --git a/llvm/include/llvm/Frontend/Directive/DirectiveBase.td b/llvm/include/llvm/Frontend/Directive/DirectiveBase.td index 3e2744dea8d14..4faea18324cb7 100644 --- a/llvm/include/llvm/Frontend/Directive/DirectiveBase.td +++ b/llvm/include/llvm/Frontend/Directive/DirectiveBase.td @@ -172,15 +172,6 @@ def CA_Meta: Category<"Meta"> {} def CA_Subsidiary: Category<"Subsidiary"> {} def CA_Utility: Category<"Utility"> {} -class SourceLanguage { - string name = n; // Name of the enum value in enum class Association. -} - -// The C languages also implies C++ until there is a reason to add C++ -// separately. -def L_C : SourceLanguage<"C"> {} -def L_Fortran : SourceLanguage<"Fortran"> {} - // Information about a specific directive. class Directive { // Name of the directive. Can be composite directive sepearted by whitespace. @@ -214,7 +205,4 @@ class Directive { // The category of the directive. Category category = ?; - - // The languages that allow this directive. Default: all languages. - list languages = [L_C, L_Fortran]; } diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 0af4b436649a3..194b1e657c493 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -573,7 +573,6 @@ def OMP_Allocators : Directive<"allocators"> { ]; let association = AS_Block; let category = CA_Executable; - let languages = [L_Fortran]; } def OMP_Assumes : Directive<"assumes"> { let association = AS_None; @@ -587,6 +586,10 @@ def OMP_Assumes : Directive<"assumes"> { VersionedClause, ]; } +def OMP_EndAssumes : Directive<"end assumes"> { + let association = AS_Delimited; + let category = OMP_Assumes.category; +} def OMP_Assume : Directive<"assume"> { let association = AS_Block; let category = CA_Informational; @@ -634,12 +637,6 @@ def OMP_BeginAssumes : Directive<"begin assumes"> { VersionedClause, VersionedClause, ]; - let languages = [L_C]; -} -def OMP_EndAssumes : Directive<"end assumes"> { - let association = AS_Delimited; - let category = OMP_BeginAssumes.category; - let languages = OMP_BeginAssumes.languages; } def OMP_BeginDeclareTarget : Directive<"begin declare target"> { let allowedClauses = [ @@ -650,22 +647,10 @@ def OMP_BeginDeclareTarget : Directive<"begin declare target"> { ]; let association = AS_Delimited; let category = CA_Declarative; - let languages = [L_C]; -} -def OMP_EndDeclareTarget : Directive<"end declare target"> { - let association = AS_Delimited; - let category = OMP_BeginDeclareTarget.category; - let languages = OMP_BeginDeclareTarget.languages; } def OMP_BeginDeclareVariant : Directive<"begin declare variant"> { let association = AS_Delimited; let category = CA_Declarative; - let languages = [L_C]; -} -def OMP_EndDeclareVariant : Directive<"end declare variant"> { - let association = AS_Delimited; - let category = OMP_BeginDeclareVariant.category; - let languages = OMP_BeginDeclareVariant.languages; } def OMP_Cancel : Directive<"cancel"> { let allowedOnceClauses = [ @@ -732,6 +717,10 @@ def OMP_DeclareTarget : Directive<"declare target"> { let association = AS_None; let category = CA_Declarative; } +def OMP_EndDeclareTarget : Directive<"end declare target"> { + let association = AS_Delimited; + let category = OMP_DeclareTarget.category; +} def OMP_DeclareVariant : Directive<"declare variant"> { let allowedClauses = [ VersionedClause, @@ -742,7 +731,10 @@ def OMP_DeclareVariant : Directive<"declare variant"> { ]; let association = AS_Declaration; let category = CA_Declarative; - let languages = [L_C]; +} +def OMP_EndDeclareVariant : Directive<"end declare variant"> { + let association = AS_Delimited; + let category = OMP_DeclareVariant.category; } def OMP_Depobj : Directive<"depobj"> { let allowedClauses = [ @@ -801,16 +793,15 @@ def OMP_Do : Directive<"do"> { ]; let association = AS_Loop; let category = CA_Executable; - let languages = [L_Fortran]; } def OMP_EndDo : Directive<"end do"> { let allowedOnceClauses = [ VersionedClause, ]; + // Needed for association computation, since OMP_Do has it "from leafConstructs". let leafConstructs = OMP_Do.leafConstructs; let association = OMP_Do.association; let category = OMP_Do.category; - let languages = OMP_Do.languages; } def OMP_Error : Directive<"error"> { let allowedClauses = [ @@ -850,7 +841,6 @@ def OMP_For : Directive<"for"> { ]; let association = AS_Loop; let category = CA_Executable; - let languages = [L_C]; } def OMP_Interchange : Directive<"interchange"> { let allowedOnceClauses = [ @@ -994,7 +984,6 @@ def OMP_EndScope : Directive<"end scope"> { let leafConstructs = OMP_Scope.leafConstructs; let association = OMP_Scope.association; let category = OMP_Scope.category; - let languages = [L_Fortran]; } def OMP_Section : Directive<"section"> { let association = AS_Separating; @@ -1019,7 +1008,6 @@ def OMP_EndSections : Directive<"end sections"> { let leafConstructs = OMP_Sections.leafConstructs; let association = OMP_Sections.association; let category = OMP_Sections.category; - let languages = [L_Fortran]; } def OMP_Simd : Directive<"simd"> { let allowedClauses = [ @@ -1064,7 +1052,6 @@ def OMP_EndSingle : Directive<"end single"> { let leafConstructs = OMP_Single.leafConstructs; let association = OMP_Single.association; let category = OMP_Single.category; - let languages = [L_Fortran]; } def OMP_Target : Directive<"target"> { let allowedClauses = [ @@ -1272,7 +1259,6 @@ def OMP_Workshare : Directive<"workshare"> { ]; let association = AS_Block; let category = CA_Executable; - let languages = [L_Fortran]; } def OMP_EndWorkshare : Directive<"end workshare"> { let allowedOnceClauses = [ @@ -1281,7 +1267,6 @@ def OMP_EndWorkshare : Directive<"end workshare"> { let leafConstructs = OMP_Workshare.leafConstructs; let association = OMP_Workshare.association; let category = OMP_Workshare.category; - let languages = [L_Fortran]; } //===----------------------------------------------------------------------===// @@ -1313,7 +1298,6 @@ def OMP_DistributeParallelDo : Directive<"distribute parallel do"> { ]; let leafConstructs = [OMP_Distribute, OMP_Parallel, OMP_Do]; let category = CA_Executable; - let languages = [L_Fortran]; } def OMP_DistributeParallelDoSimd : Directive<"distribute parallel do simd"> { let allowedClauses = [ @@ -1340,7 +1324,6 @@ def OMP_DistributeParallelDoSimd : Directive<"distribute parallel do simd"> { ]; let leafConstructs = [OMP_Distribute, OMP_Parallel, OMP_Do, OMP_Simd]; let category = CA_Executable; - let languages = [L_Fortran]; } def OMP_DistributeParallelFor : Directive<"distribute parallel for"> { let allowedClauses = [ @@ -1363,7 +1346,6 @@ def OMP_DistributeParallelFor : Directive<"distribute parallel for"> { ]; let leafConstructs = [OMP_Distribute, OMP_Parallel, OMP_For]; let category = CA_Executable; - let languages = [L_C]; } def OMP_DistributeParallelForSimd : Directive<"distribute parallel for simd"> { let allowedClauses = [ @@ -1391,7 +1373,6 @@ def OMP_DistributeParallelForSimd : Directive<"distribute parallel for simd"> { ]; let leafConstructs = [OMP_Distribute, OMP_Parallel, OMP_For, OMP_Simd]; let category = CA_Executable; - let languages = [L_C]; } def OMP_DistributeSimd : Directive<"distribute simd"> { let allowedClauses = [ @@ -1441,7 +1422,6 @@ def OMP_DoSimd : Directive<"do simd"> { ]; let leafConstructs = [OMP_Do, OMP_Simd]; let category = CA_Executable; - let languages = [L_Fortran]; } def OMP_EndDoSimd : Directive<"end do simd"> { let allowedOnceClauses = [ @@ -1450,7 +1430,6 @@ def OMP_EndDoSimd : Directive<"end do simd"> { let leafConstructs = OMP_DoSimd.leafConstructs; let association = OMP_DoSimd.association; let category = OMP_DoSimd.category; - let languages = [L_Fortran]; } def OMP_ForSimd : Directive<"for simd"> { let allowedClauses = [ @@ -1632,7 +1611,6 @@ def OMP_ParallelDo : Directive<"parallel do"> { ]; let leafConstructs = [OMP_Parallel, OMP_Do]; let category = CA_Executable; - let languages = [L_Fortran]; } def OMP_ParallelDoSimd : Directive<"parallel do simd"> { let allowedClauses = [ @@ -1661,7 +1639,6 @@ def OMP_ParallelDoSimd : Directive<"parallel do simd"> { ]; let leafConstructs = [OMP_Parallel, OMP_Do, OMP_Simd]; let category = CA_Executable; - let languages = [L_Fortran]; } def OMP_ParallelFor : Directive<"parallel for"> { let allowedClauses = [ @@ -1685,7 +1662,6 @@ def OMP_ParallelFor : Directive<"parallel for"> { ]; let leafConstructs = [OMP_Parallel, OMP_For]; let category = CA_Executable; - let languages = [L_C]; } def OMP_ParallelForSimd : Directive<"parallel for simd"> { let allowedClauses = [ @@ -1713,7 +1689,6 @@ def OMP_ParallelForSimd : Directive<"parallel for simd"> { ]; let leafConstructs = [OMP_Parallel, OMP_For, OMP_Simd]; let category = CA_Executable; - let languages = [L_C]; } def OMP_parallel_loop : Directive<"parallel loop"> { let allowedClauses = [ @@ -1932,7 +1907,6 @@ def OMP_ParallelWorkshare : Directive<"parallel workshare"> { ]; let leafConstructs = [OMP_Parallel, OMP_Workshare]; let category = CA_Executable; - let languages = [L_Fortran]; } def OMP_TargetParallel : Directive<"target parallel"> { let allowedClauses = [ @@ -1992,7 +1966,6 @@ def OMP_TargetParallelDo : Directive<"target parallel do"> { ]; let leafConstructs = [OMP_Target, OMP_Parallel, OMP_Do]; let category = CA_Executable; - let languages = [L_Fortran]; } def OMP_TargetParallelDoSimd : Directive<"target parallel do simd"> { let allowedClauses = [ @@ -2026,7 +1999,6 @@ def OMP_TargetParallelDoSimd : Directive<"target parallel do simd"> { ]; let leafConstructs = [OMP_Target, OMP_Parallel, OMP_Do, OMP_Simd]; let category = CA_Executable; - let languages = [L_Fortran]; } def OMP_TargetParallelFor : Directive<"target parallel for"> { let allowedClauses = [ @@ -2061,7 +2033,6 @@ def OMP_TargetParallelFor : Directive<"target parallel for"> { ]; let leafConstructs = [OMP_Target, OMP_Parallel, OMP_For]; let category = CA_Executable; - let languages = [L_C]; } def OMP_TargetParallelForSimd : Directive<"target parallel for simd"> { let allowedClauses = [ @@ -2100,7 +2071,6 @@ def OMP_TargetParallelForSimd : Directive<"target parallel for simd"> { ]; let leafConstructs = [OMP_Target, OMP_Parallel, OMP_For, OMP_Simd]; let category = CA_Executable; - let languages = [L_C]; } def OMP_target_parallel_loop : Directive<"target parallel loop"> { let allowedClauses = [ @@ -2260,10 +2230,8 @@ def OMP_TargetTeamsDistributeParallelDo : VersionedClause, VersionedClause, ]; - let leafConstructs = - [OMP_Target, OMP_Teams, OMP_Distribute, OMP_Parallel, OMP_Do]; + let leafConstructs = [OMP_Target, OMP_Teams, OMP_Distribute, OMP_Parallel, OMP_Do]; let category = CA_Executable; - let languages = [L_Fortran]; } def OMP_TargetTeamsDistributeParallelDoSimd : Directive<"target teams distribute parallel do simd"> { @@ -2300,10 +2268,8 @@ def OMP_TargetTeamsDistributeParallelDoSimd : VersionedClause, VersionedClause, ]; - let leafConstructs = - [OMP_Target, OMP_Teams, OMP_Distribute, OMP_Parallel, OMP_Do, OMP_Simd]; + let leafConstructs = [OMP_Target, OMP_Teams, OMP_Distribute, OMP_Parallel, OMP_Do, OMP_Simd]; let category = CA_Executable; - let languages = [L_Fortran]; } def OMP_TargetTeamsDistributeParallelFor : Directive<"target teams distribute parallel for"> { @@ -2337,10 +2303,8 @@ def OMP_TargetTeamsDistributeParallelFor : let allowedOnceClauses = [ VersionedClause, ]; - let leafConstructs = - [OMP_Target, OMP_Teams, OMP_Distribute, OMP_Parallel, OMP_For]; + let leafConstructs = [OMP_Target, OMP_Teams, OMP_Distribute, OMP_Parallel, OMP_For]; let category = CA_Executable; - let languages = [L_C]; } def OMP_TargetTeamsDistributeParallelForSimd : Directive<"target teams distribute parallel for simd"> { @@ -2379,10 +2343,8 @@ def OMP_TargetTeamsDistributeParallelForSimd : let allowedOnceClauses = [ VersionedClause, ]; - let leafConstructs = - [OMP_Target, OMP_Teams, OMP_Distribute, OMP_Parallel, OMP_For, OMP_Simd]; + let leafConstructs = [OMP_Target, OMP_Teams, OMP_Distribute, OMP_Parallel, OMP_For, OMP_Simd]; let category = CA_Executable; - let languages = [L_C]; } def OMP_TargetTeamsDistributeSimd : Directive<"target teams distribute simd"> { @@ -2532,7 +2494,6 @@ def OMP_TeamsDistributeParallelDo : ]; let leafConstructs = [OMP_Teams, OMP_Distribute, OMP_Parallel, OMP_Do]; let category = CA_Executable; - let languages = [L_Fortran]; } def OMP_TeamsDistributeParallelDoSimd : Directive<"teams distribute parallel do simd"> { @@ -2561,10 +2522,8 @@ def OMP_TeamsDistributeParallelDoSimd : VersionedClause, VersionedClause, ]; - let leafConstructs = - [OMP_Teams, OMP_Distribute, OMP_Parallel, OMP_Do, OMP_Simd]; + let leafConstructs = [OMP_Teams, OMP_Distribute, OMP_Parallel, OMP_Do, OMP_Simd]; let category = CA_Executable; - let languages = [L_Fortran]; } def OMP_TeamsDistributeParallelFor : Directive<"teams distribute parallel for"> { @@ -2590,7 +2549,6 @@ def OMP_TeamsDistributeParallelFor : ]; let leafConstructs = [OMP_Teams, OMP_Distribute, OMP_Parallel, OMP_For]; let category = CA_Executable; - let languages = [L_C]; } def OMP_TeamsDistributeParallelForSimd : Directive<"teams distribute parallel for simd"> { @@ -2618,10 +2576,8 @@ def OMP_TeamsDistributeParallelForSimd : VersionedClause, VersionedClause, ]; - let leafConstructs = - [OMP_Teams, OMP_Distribute, OMP_Parallel, OMP_For, OMP_Simd]; + let leafConstructs = [OMP_Teams, OMP_Distribute, OMP_Parallel, OMP_For, OMP_Simd]; let category = CA_Executable; - let languages = [L_C]; } def OMP_TeamsDistributeSimd : Directive<"teams distribute simd"> { let allowedClauses = [ diff --git a/llvm/include/llvm/IR/DerivedTypes.h b/llvm/include/llvm/IR/DerivedTypes.h index fba6f45d37d1d..d0dffa9de616a 100644 --- a/llvm/include/llvm/IR/DerivedTypes.h +++ b/llvm/include/llvm/IR/DerivedTypes.h @@ -554,23 +554,6 @@ class VectorType : public Type { return VectorType::get(VTy->getElementType(), EltCnt * 2); } - /// This static method attempts to construct a VectorType with the same - /// size-in-bits as SizeTy but with an element type that matches the scalar - /// type of EltTy. The VectorType is returned on success, nullptr otherwise. - static VectorType *getWithSizeAndScalar(VectorType *SizeTy, Type *EltTy) { - if (SizeTy->getScalarType() == EltTy->getScalarType()) - return SizeTy; - - unsigned EltSize = EltTy->getScalarSizeInBits(); - if (!SizeTy->getPrimitiveSizeInBits().isKnownMultipleOf(EltSize)) - return nullptr; - - ElementCount EC = SizeTy->getElementCount() - .multiplyCoefficientBy(SizeTy->getScalarSizeInBits()) - .divideCoefficientBy(EltSize); - return VectorType::get(EltTy->getScalarType(), EC); - } - /// Return true if the specified type is valid as a element type. static bool isValidElementType(Type *ElemTy); diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index 7e0521e72ceb2..f650c06590ef2 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -608,33 +608,43 @@ class IRBuilderBase { /// Create and insert a memset to the specified pointer and the /// specified value. /// - /// If the pointer isn't an i8*, it will be converted. If alias metadata is - /// specified, it will be added to the instruction. + /// If the pointer isn't an i8*, it will be converted. If a TBAA tag is + /// specified, it will be added to the instruction. Likewise with alias.scope + /// and noalias tags. CallInst *CreateMemSet(Value *Ptr, Value *Val, uint64_t Size, MaybeAlign Align, bool isVolatile = false, - const AAMDNodes &AAInfo = AAMDNodes()) { - return CreateMemSet(Ptr, Val, getInt64(Size), Align, isVolatile, AAInfo); + MDNode *TBAATag = nullptr, MDNode *ScopeTag = nullptr, + MDNode *NoAliasTag = nullptr) { + return CreateMemSet(Ptr, Val, getInt64(Size), Align, isVolatile, + TBAATag, ScopeTag, NoAliasTag); } CallInst *CreateMemSet(Value *Ptr, Value *Val, Value *Size, MaybeAlign Align, - bool isVolatile = false, - const AAMDNodes &AAInfo = AAMDNodes()); + bool isVolatile = false, MDNode *TBAATag = nullptr, + MDNode *ScopeTag = nullptr, + MDNode *NoAliasTag = nullptr); CallInst *CreateMemSetInline(Value *Dst, MaybeAlign DstAlign, Value *Val, Value *Size, bool IsVolatile = false, - const AAMDNodes &AAInfo = AAMDNodes()); + MDNode *TBAATag = nullptr, + MDNode *ScopeTag = nullptr, + MDNode *NoAliasTag = nullptr); /// Create and insert an element unordered-atomic memset of the region of /// memory starting at the given pointer to the given value. /// - /// If the pointer isn't an i8*, it will be converted. If alias metadata is - /// specified, it will be added to the instruction. - CallInst * - CreateElementUnorderedAtomicMemSet(Value *Ptr, Value *Val, uint64_t Size, - Align Alignment, uint32_t ElementSize, - const AAMDNodes &AAInfo = AAMDNodes()) { - return CreateElementUnorderedAtomicMemSet( - Ptr, Val, getInt64(Size), Align(Alignment), ElementSize, AAInfo); + /// If the pointer isn't an i8*, it will be converted. If a TBAA tag is + /// specified, it will be added to the instruction. Likewise with alias.scope + /// and noalias tags. + CallInst *CreateElementUnorderedAtomicMemSet(Value *Ptr, Value *Val, + uint64_t Size, Align Alignment, + uint32_t ElementSize, + MDNode *TBAATag = nullptr, + MDNode *ScopeTag = nullptr, + MDNode *NoAliasTag = nullptr) { + return CreateElementUnorderedAtomicMemSet(Ptr, Val, getInt64(Size), + Align(Alignment), ElementSize, + TBAATag, ScopeTag, NoAliasTag); } CallInst *CreateMalloc(Type *IntPtrTy, Type *AllocTy, Value *AllocSize, @@ -652,72 +662,88 @@ class IRBuilderBase { /// Generate the IR for a call to the builtin free function. CallInst *CreateFree(Value *Source, ArrayRef Bundles = {}); - CallInst * - CreateElementUnorderedAtomicMemSet(Value *Ptr, Value *Val, Value *Size, - Align Alignment, uint32_t ElementSize, - const AAMDNodes &AAInfo = AAMDNodes()); + CallInst *CreateElementUnorderedAtomicMemSet(Value *Ptr, Value *Val, + Value *Size, Align Alignment, + uint32_t ElementSize, + MDNode *TBAATag = nullptr, + MDNode *ScopeTag = nullptr, + MDNode *NoAliasTag = nullptr); /// Create and insert a memcpy between the specified pointers. /// - /// If the pointers aren't i8*, they will be converted. If alias metadata is - /// specified, it will be added to the instruction. + /// If the pointers aren't i8*, they will be converted. If a TBAA tag is + /// specified, it will be added to the instruction. Likewise with alias.scope /// and noalias tags. CallInst *CreateMemCpy(Value *Dst, MaybeAlign DstAlign, Value *Src, MaybeAlign SrcAlign, uint64_t Size, - bool isVolatile = false, - const AAMDNodes &AAInfo = AAMDNodes()) { + bool isVolatile = false, MDNode *TBAATag = nullptr, + MDNode *TBAAStructTag = nullptr, + MDNode *ScopeTag = nullptr, + MDNode *NoAliasTag = nullptr) { return CreateMemCpy(Dst, DstAlign, Src, SrcAlign, getInt64(Size), - isVolatile, AAInfo); + isVolatile, TBAATag, TBAAStructTag, ScopeTag, + NoAliasTag); } - CallInst *CreateMemTransferInst(Intrinsic::ID IntrID, Value *Dst, - MaybeAlign DstAlign, Value *Src, - MaybeAlign SrcAlign, Value *Size, - bool isVolatile = false, - const AAMDNodes &AAInfo = AAMDNodes()); + CallInst *CreateMemTransferInst( + Intrinsic::ID IntrID, Value *Dst, MaybeAlign DstAlign, Value *Src, + MaybeAlign SrcAlign, Value *Size, bool isVolatile = false, + MDNode *TBAATag = nullptr, MDNode *TBAAStructTag = nullptr, + MDNode *ScopeTag = nullptr, MDNode *NoAliasTag = nullptr); CallInst *CreateMemCpy(Value *Dst, MaybeAlign DstAlign, Value *Src, MaybeAlign SrcAlign, Value *Size, - bool isVolatile = false, - const AAMDNodes &AAInfo = AAMDNodes()) { + bool isVolatile = false, MDNode *TBAATag = nullptr, + MDNode *TBAAStructTag = nullptr, + MDNode *ScopeTag = nullptr, + MDNode *NoAliasTag = nullptr) { return CreateMemTransferInst(Intrinsic::memcpy, Dst, DstAlign, Src, - SrcAlign, Size, isVolatile, AAInfo); + SrcAlign, Size, isVolatile, TBAATag, + TBAAStructTag, ScopeTag, NoAliasTag); } - CallInst *CreateMemCpyInline(Value *Dst, MaybeAlign DstAlign, Value *Src, - MaybeAlign SrcAlign, Value *Size, - bool isVolatile = false, - const AAMDNodes &AAInfo = AAMDNodes()) { + CallInst * + CreateMemCpyInline(Value *Dst, MaybeAlign DstAlign, Value *Src, + MaybeAlign SrcAlign, Value *Size, bool isVolatile = false, + MDNode *TBAATag = nullptr, MDNode *TBAAStructTag = nullptr, + MDNode *ScopeTag = nullptr, MDNode *NoAliasTag = nullptr) { return CreateMemTransferInst(Intrinsic::memcpy_inline, Dst, DstAlign, Src, - SrcAlign, Size, isVolatile, AAInfo); + SrcAlign, Size, isVolatile, TBAATag, + TBAAStructTag, ScopeTag, NoAliasTag); } /// Create and insert an element unordered-atomic memcpy between the /// specified pointers. /// - /// DstAlign/SrcAlign are the alignments of the Dst/Src pointers, - /// respectively. + /// DstAlign/SrcAlign are the alignments of the Dst/Src pointers, respectively. /// - /// If the pointers aren't i8*, they will be converted. If alias metadata is - /// specified, it will be added to the instruction. + /// If the pointers aren't i8*, they will be converted. If a TBAA tag is + /// specified, it will be added to the instruction. Likewise with alias.scope + /// and noalias tags. CallInst *CreateElementUnorderedAtomicMemCpy( Value *Dst, Align DstAlign, Value *Src, Align SrcAlign, Value *Size, - uint32_t ElementSize, const AAMDNodes &AAInfo = AAMDNodes()); + uint32_t ElementSize, MDNode *TBAATag = nullptr, + MDNode *TBAAStructTag = nullptr, MDNode *ScopeTag = nullptr, + MDNode *NoAliasTag = nullptr); CallInst *CreateMemMove(Value *Dst, MaybeAlign DstAlign, Value *Src, MaybeAlign SrcAlign, uint64_t Size, - bool isVolatile = false, - const AAMDNodes &AAInfo = AAMDNodes()) { + bool isVolatile = false, MDNode *TBAATag = nullptr, + MDNode *ScopeTag = nullptr, + MDNode *NoAliasTag = nullptr) { return CreateMemMove(Dst, DstAlign, Src, SrcAlign, getInt64(Size), - isVolatile, AAInfo); + isVolatile, TBAATag, ScopeTag, NoAliasTag); } CallInst *CreateMemMove(Value *Dst, MaybeAlign DstAlign, Value *Src, MaybeAlign SrcAlign, Value *Size, - bool isVolatile = false, - const AAMDNodes &AAInfo = AAMDNodes()) { + bool isVolatile = false, MDNode *TBAATag = nullptr, + MDNode *ScopeTag = nullptr, + MDNode *NoAliasTag = nullptr) { return CreateMemTransferInst(Intrinsic::memmove, Dst, DstAlign, Src, - SrcAlign, Size, isVolatile, AAInfo); + SrcAlign, Size, isVolatile, TBAATag, + /*TBAAStructTag=*/nullptr, ScopeTag, + NoAliasTag); } /// \brief Create and insert an element unordered-atomic memmove between the @@ -726,11 +752,14 @@ class IRBuilderBase { /// DstAlign/SrcAlign are the alignments of the Dst/Src pointers, /// respectively. /// - /// If the pointers aren't i8*, they will be converted. If alias metadata is - /// specified, it will be added to the instruction. + /// If the pointers aren't i8*, they will be converted. If a TBAA tag is + /// specified, it will be added to the instruction. Likewise with alias.scope + /// and noalias tags. CallInst *CreateElementUnorderedAtomicMemMove( Value *Dst, Align DstAlign, Value *Src, Align SrcAlign, Value *Size, - uint32_t ElementSize, const AAMDNodes &AAInfo = AAMDNodes()); + uint32_t ElementSize, MDNode *TBAATag = nullptr, + MDNode *TBAAStructTag = nullptr, MDNode *ScopeTag = nullptr, + MDNode *NoAliasTag = nullptr); private: CallInst *getReductionIntrinsic(Intrinsic::ID ID, Value *Src); diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index e1a135a5ad48e..28450f03b7619 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -903,7 +903,7 @@ def int_stackrestore : DefaultAttrsIntrinsic<[], [llvm_anyptr_ty]>, def int_get_dynamic_area_offset : DefaultAttrsIntrinsic<[llvm_anyint_ty]>; -def int_thread_pointer : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [], [IntrNoMem]>, +def int_thread_pointer : DefaultAttrsIntrinsic<[llvm_ptr_ty], [], [IntrNoMem]>, ClangBuiltin<"__builtin_thread_pointer">; // IntrInaccessibleMemOrArgMemOnly is a little more pessimistic than strictly diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index 67c47095076d8..0b26bb9829005 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -357,33 +357,38 @@ class MMA_SIGNATURE { !ne(A.ptx_elt_type, B.ptx_elt_type): [A, B], true: [A] ); - string ret = !foldl("", id_frags, a, b, !strconcat(a, "_", b.ptx_elt_type)); + string ret = !foldl("", id_frags, a, b, !strconcat(a, ".", b.ptx_elt_type)); } class WMMA_NAME { string signature = MMA_SIGNATURE.ret; - string record = "int_nvvm_wmma_" - # A.geom - # "_mma" - # !subst(".", "_", b1op) - # "_" # ALayout - # "_" # BLayout - # !if(!ne(Rnd, ""), !strconcat("_", Rnd), "") - # signature - # !if(Satfinite, "_satfinite", ""); + string llvm = "llvm.nvvm.wmma." + # A.geom + # ".mma" + # b1op + # "." # ALayout + # "." # BLayout + # !if(!ne(Rnd, ""), !strconcat(".", Rnd), "") + # signature + # !if(Satfinite, ".satfinite", ""); + + string record = !subst(".", "_", + !subst("llvm.", "int_", llvm)); } class MMA_NAME { string signature = MMA_SIGNATURE.ret; - string record = "int_nvvm_mma" - # !subst(".", "_", b1op) - # "_" # A.geom - # "_" # ALayout - # "_" # BLayout - # !if(Satfinite, "_satfinite", "") - # signature; + string llvm = "llvm.nvvm.mma" + # b1op + # "." # A.geom + # "." # ALayout + # "." # BLayout + # !if(Satfinite, ".satfinite", "") + # signature; + string record = !subst(".", "_", + !subst("llvm.", "int_", llvm)); } class LDMATRIX_NAME { @@ -691,6 +696,101 @@ class SHFL_INFO { [OpType, llvm_i32_ty, llvm_i32_ty]); } +class CP_ASYNC_BULK_TENSOR_G2S_INTR { + string Name = "int_nvvm_cp_async_bulk_tensor_g2s_" # mode # "_" # dim # "d"; + + bit IsIm2Col = !if(!eq(mode, "im2col"), 1, 0); + int NumIm2ColOffsets = !if(IsIm2Col, !add(dim, -2), 0); + list Im2ColOffsetsTy = !listsplat(llvm_i16_ty, NumIm2ColOffsets); + list TensorDimsTy = !listsplat(llvm_i32_ty, dim); + list ArgsTy = !listconcat( + [llvm_shared_cluster_ptr_ty, // dst_shared_cluster_ptr + llvm_shared_ptr_ty, // mbarrier_smem_ptr + llvm_ptr_ty], // tensormap_ptr + TensorDimsTy, // actual tensor dims + Im2ColOffsetsTy, // im2col offsets + [llvm_i16_ty, // cta_mask + llvm_i64_ty, // cache_hint + llvm_i1_ty, // Flag for cta_mask + llvm_i1_ty] // Flag for cache_hint + ); + + int TempFlagsStartIdx = !add(dim, 5); + int FlagsStartIdx = !add(TempFlagsStartIdx, NumIm2ColOffsets); + list IntrProp = [IntrConvergent, + WriteOnly>, ReadOnly>, + NoCapture>, NoCapture>, NoCapture>, + ImmArg>, + ImmArg>]; +} + +class CP_ASYNC_BULK_TENSOR_S2G_INTR { + string Name = "int_nvvm_cp_async_bulk_tensor_s2g_" # mode # "_" # dim # "d"; + + list TensorDimsTy = !listsplat(llvm_i32_ty, dim); + list ArgsTy = !listconcat( + [llvm_shared_ptr_ty, // src_smem_ptr + llvm_ptr_ty], // tensormap_ptr + TensorDimsTy, // actual tensor dims + [llvm_i64_ty, // cache_hint + llvm_i1_ty] // Flag for cache_hint + ); + int FlagsStartIdx = !add(dim, 3); + list IntrProp = [IntrConvergent, + ReadOnly>, ReadOnly>, + NoCapture>, NoCapture>, + ImmArg>]; +} + +class CP_ASYNC_BULK_TENSOR_PREFETCH_INTR { + string Name = "int_nvvm_cp_async_bulk_tensor_prefetch_" # mode # "_" # dim # "d"; + + bit IsIm2Col = !if(!eq(mode, "im2col"), 1, 0); + int NumIm2ColOffsets = !if(IsIm2Col, !add(dim, -2), 0); + list Im2ColOffsetsTy = !listsplat(llvm_i16_ty, NumIm2ColOffsets); + list TensorDimsTy = !listsplat(llvm_i32_ty, dim); + list ArgsTy = !listconcat( + [llvm_ptr_ty], // tensormap_ptr + TensorDimsTy, // actual tensor dims + Im2ColOffsetsTy, // im2col offsets + [llvm_i64_ty, // cache_hint + llvm_i1_ty] // Flag for cache_hint + ); + + int TempFlagsStartIdx = !add(dim, 2); + int FlagsStartIdx = !add(TempFlagsStartIdx, NumIm2ColOffsets); + list IntrProp = [IntrConvergent, + ReadOnly>, NoCapture>, + ImmArg>]; +} + +class CP_ASYNC_BULK_TENSOR_REDUCE_INTR { + string Suffix = op # "_" # mode # "_" # dim # "d"; + string Name = "int_nvvm_cp_async_bulk_tensor_reduce_" # Suffix; + + list TensorDimsTy = !listsplat(llvm_i32_ty, dim); + list ArgsTy = !listconcat( + [llvm_shared_ptr_ty, // src_smem_ptr + llvm_ptr_ty], // tensormap_ptr + TensorDimsTy, // actual tensor dims + [llvm_i64_ty, // cache_hint + llvm_i1_ty] // Flag for cache_hint + ); + int FlagsStartIdx = !add(dim, 3); + list IntrProp = [IntrConvergent, + ReadOnly>, ReadOnly>, + NoCapture>, NoCapture>, + ImmArg>]; +} + +class NVVM_TCGEN05_LDST_NAME { + string intr = "llvm.nvvm.tcgen05." # Op + # "." # Shape + # "." # "x" # !shl(1, Num); + + string record = !subst(".", "_", + !subst("llvm.", "int_", intr)); +} class NVVM_TCGEN05_LDST_ACCESS_SIZE { int shift = !cond(!eq(Shape, "16x128b"): 1, !eq(Shape, "16x256b"): 2, @@ -710,28 +810,6 @@ class NVVM_TCGEN05_LDST_ACCESS_SIZE { true : llvm_void_ty); } -class TexVector types> { - string Name = name; - list Types = types; -} - -def TV_I8 : TexVector<"i8", [llvm_i16_ty]>; -def TV_I16 : TexVector<"i16", [llvm_i16_ty]>; -def TV_I32 : TexVector<"i32", [llvm_i32_ty]>; -def TV_I64 : TexVector<"i64", [llvm_i64_ty]>; -def TV_V2I8 : TexVector<"v2i8", !listsplat(llvm_i16_ty, 2)>; -def TV_V2I16 : TexVector<"v2i16", !listsplat(llvm_i16_ty, 2)>; -def TV_V2I32 : TexVector<"v2i32", !listsplat(llvm_i32_ty, 2)>; -def TV_V2I64 : TexVector<"v2i64", !listsplat(llvm_i64_ty, 2)>; -def TV_V4I8 : TexVector<"v4i8", !listsplat(llvm_i16_ty, 4)>; -def TV_V4I16 : TexVector<"v4i16", !listsplat(llvm_i16_ty, 4)>; -def TV_V4I32 : TexVector<"v4i32", !listsplat(llvm_i32_ty, 4)>; - - -def V4F32 : TexVector<"v4f32", !listsplat(llvm_float_ty, 4)>; -def V4S32 : TexVector<"v4s32", !listsplat(llvm_i32_ty, 4)>; -def V4U32 : TexVector<"v4u32", !listsplat(llvm_i32_ty, 4)>; - class NVVMBuiltin : ClangBuiltin { assert !eq(!substr(NAME, 0, !size("int_nvvm_")), "int_nvvm_"), @@ -750,116 +828,131 @@ let TargetPrefix = "nvvm" in { // // Min Max // - let IntrProperties = [IntrNoMem, IntrSpeculatable, Commutative] in { - foreach operation = ["min", "max"] in { - def int_nvvm_f # operation # _d : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty]>; - foreach variant = ["", "_xorsign_abs"] in { - foreach nan = ["", "_nan"] in { - foreach ftz = ["", "_ftz"] in { - def int_nvvm_f # operation # ftz # nan # variant # _f : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty]>; + foreach operation = ["min", "max"] in { + def int_nvvm_f # operation # _d : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_f # operation # ftz # nan # variant # _f16 : - DefaultAttrsIntrinsic<[llvm_half_ty], [llvm_half_ty, llvm_half_ty]>; + foreach variant = ["", "_xorsign_abs"] in { + foreach nan = ["", "_nan"] in { + foreach ftz = ["", "_ftz"] in { + def int_nvvm_f # operation # ftz # nan # variant # _f : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_f # operation # ftz # nan # variant # _f16x2 : - DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_v2f16_ty, llvm_v2f16_ty]>; + def int_nvvm_f # operation # ftz # nan # variant # _f16 : + DefaultAttrsIntrinsic<[llvm_half_ty], [llvm_half_ty, llvm_half_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_f # operation # ftz # nan # variant # _bf16 : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_bfloat_ty], [llvm_bfloat_ty, llvm_bfloat_ty]>; + def int_nvvm_f # operation # ftz # nan # variant # _f16x2 : + DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_v2f16_ty, llvm_v2f16_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_f # operation # ftz # nan # variant # _bf16x2 : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_v2bf16_ty], [llvm_v2bf16_ty, llvm_v2bf16_ty]>; - } // ftz - } // nan - } // variant - } // operation - } + def int_nvvm_f # operation # ftz # nan # variant # _bf16 : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_bfloat_ty], [llvm_bfloat_ty, llvm_bfloat_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; + + def int_nvvm_f # operation # ftz # nan # variant # _bf16x2 : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_v2bf16_ty], [llvm_v2bf16_ty, llvm_v2bf16_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; + } // ftz + } // nan + } // variant + } // operation // // Multiplication // - let IntrProperties = [IntrNoMem, IntrSpeculatable, Commutative] in { - foreach sign = ["", "u"] in { - def int_nvvm_mulhi_ # sign # s : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty]>; - def int_nvvm_mulhi_ # sign # i : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty]>; + foreach sign = ["", "u"] in { + def int_nvvm_mulhi_ # sign # s : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_mulhi_ # sign # ll : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty]>; + def int_nvvm_mulhi_ # sign # i : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; - def int_nvvm_mul24_ # sign # i : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty]>; - } + def int_nvvm_mulhi_ # sign # ll : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; - foreach rnd = ["rn", "rz", "rm", "rp"] in { - foreach ftz = ["", "_ftz"] in - def int_nvvm_mul_ # rnd # ftz # _f : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty]>; + def int_nvvm_mul24_ # sign # i : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; + } - def int_nvvm_mul_ # rnd # _d : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty]>; - } + foreach rnd = ["rn", "rz", "rm", "rp"] in { + foreach ftz = ["", "_ftz"] in + def int_nvvm_mul_ # rnd # ftz # _f : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; + + def int_nvvm_mul_ # rnd # _d : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; } // // Div // - let IntrProperties = [IntrNoMem] in { - foreach ftz = ["", "_ftz"] in { - def int_nvvm_div_approx # ftz # _f : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty]>; - def int_nvvm_div_full # ftz : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty]>; - } + foreach ftz = ["", "_ftz"] in { + def int_nvvm_div_approx # ftz # _f : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem]>; - foreach rnd = ["rn", "rz", "rm", "rp"] in { - foreach ftz = ["", "_ftz"] in - def int_nvvm_div_ # rnd # ftz # _f : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty]>; + def int_nvvm_div_full # ftz : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem]>; + } - def int_nvvm_div_ # rnd # _d : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty]>; - } + foreach rnd = ["rn", "rz", "rm", "rp"] in { + foreach ftz = ["", "_ftz"] in + def int_nvvm_div_ # rnd # ftz # _f : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem]>; + + def int_nvvm_div_ # rnd # _d : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], + [IntrNoMem]>; } // // Sad // - let IntrProperties = [IntrNoMem, Commutative, IntrSpeculatable] in { - foreach sign = ["", "u"] in { - def int_nvvm_sad_ # sign # s : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty]>; - def int_nvvm_sad_ # sign # i : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>; + foreach sign = ["", "u"] in { + def int_nvvm_sad_ # sign # s : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [IntrNoMem, Commutative, IntrSpeculatable]>; - def int_nvvm_sad_ # sign # ll : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty]>; - } + def int_nvvm_sad_ # sign # i : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, Commutative, IntrSpeculatable]>; + + def int_nvvm_sad_ # sign # ll : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], + [IntrNoMem, Commutative, IntrSpeculatable]>; } // // Floor Ceil // - let IntrProperties = [IntrNoMem, IntrSpeculatable] in { - foreach op = ["floor", "ceil"] in { - foreach ftz = ["", "_ftz"] in - def int_nvvm_ # op # ftz # _f : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty]>; - def int_nvvm_ # op # _d : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty]>; - } + + foreach op = ["floor", "ceil"] in { + foreach ftz = ["", "_ftz"] in + def int_nvvm_ # op # ftz # _f : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; + def int_nvvm_ # op # _d : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; } // // Abs // + foreach ftz = ["", "_ftz"] in def int_nvvm_fabs # ftz : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], @@ -868,6 +961,7 @@ let TargetPrefix = "nvvm" in { // // Abs, Neg bf16, bf16x2 // + def int_nvvm_neg_bf16 : NVVMBuiltin, DefaultAttrsIntrinsic<[llvm_bfloat_ty], [llvm_bfloat_ty], [IntrNoMem]>; def int_nvvm_neg_bf16x2 : NVVMBuiltin, @@ -876,65 +970,62 @@ let TargetPrefix = "nvvm" in { // // Round // - let IntrProperties = [IntrNoMem, IntrSpeculatable] in { - foreach ftz = ["", "_ftz"] in - def int_nvvm_round # ftz # _f : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty]>; - def int_nvvm_round_d : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty]>; - } + foreach ftz = ["", "_ftz"] in + def int_nvvm_round # ftz # _f : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; + + def int_nvvm_round_d : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; // // Trunc // - let IntrProperties = [IntrNoMem, IntrSpeculatable] in { - foreach ftz = ["", "_ftz"] in - def int_nvvm_trunc # ftz # _f : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty]>; - def int_nvvm_trunc_d : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty]>; - } + foreach ftz = ["", "_ftz"] in + def int_nvvm_trunc # ftz # _f : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; + + def int_nvvm_trunc_d : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; // // Saturate // - let IntrProperties = [IntrNoMem, IntrSpeculatable] in { - foreach ftz = ["", "_ftz"] in - def int_nvvm_saturate # ftz # _f : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty]>; - def int_nvvm_saturate_d : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty]>; - } + foreach ftz = ["", "_ftz"] in + def int_nvvm_saturate # ftz # _f : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; + + def int_nvvm_saturate_d : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; // // Exp2 Log2 // - let IntrProperties = [IntrNoMem] in { - foreach ftz = ["", "_ftz"] in - def int_nvvm_ex2_approx # ftz # _f : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty]>; - def int_nvvm_ex2_approx_d : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty]>; - def int_nvvm_ex2_approx_f16 : - DefaultAttrsIntrinsic<[llvm_half_ty], [llvm_half_ty]>; - def int_nvvm_ex2_approx_f16x2 : - DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_v2f16_ty]>; + foreach ftz = ["", "_ftz"] in + def int_nvvm_ex2_approx # ftz # _f : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - foreach ftz = ["", "_ftz"] in - def int_nvvm_lg2_approx # ftz # _f : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty]>; + def int_nvvm_ex2_approx_d : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + def int_nvvm_ex2_approx_f16 : + DefaultAttrsIntrinsic<[llvm_half_ty], [llvm_half_ty], [IntrNoMem]>; + def int_nvvm_ex2_approx_f16x2 : + DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_v2f16_ty], [IntrNoMem]>; - def int_nvvm_lg2_approx_d : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty]>; - } + foreach ftz = ["", "_ftz"] in + def int_nvvm_lg2_approx # ftz # _f : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + + def int_nvvm_lg2_approx_d : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; // // Sin Cos // + foreach op = ["sin", "cos"] in foreach ftz = ["", "_ftz"] in def int_nvvm_ # op # _approx # ftz # _f : NVVMBuiltin, @@ -943,103 +1034,105 @@ let TargetPrefix = "nvvm" in { // // Fma // - let IntrProperties = [IntrNoMem, IntrSpeculatable] in { - foreach variant = ["", "_sat", "_relu"] in { - foreach ftz = ["", "_ftz"] in { - def int_nvvm_fma_rn # ftz # variant # _f16 : - DefaultAttrsIntrinsic<[llvm_half_ty], - [llvm_half_ty, llvm_half_ty, llvm_half_ty]>; - - def int_nvvm_fma_rn # ftz # variant # _f16x2 : - DefaultAttrsIntrinsic<[llvm_v2f16_ty], - [llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty]>; - - def int_nvvm_fma_rn # ftz # variant # _bf16 : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_bfloat_ty], - [llvm_bfloat_ty, llvm_bfloat_ty, llvm_bfloat_ty]>; - - def int_nvvm_fma_rn # ftz # variant # _bf16x2 : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_v2bf16_ty], - [llvm_v2bf16_ty, llvm_v2bf16_ty, llvm_v2bf16_ty]>; - } // ftz - } // variant - foreach rnd = ["rn", "rz", "rm", "rp"] in { - foreach ftz = ["", "_ftz"] in - def int_nvvm_fma_ # rnd # ftz # _f : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], - [llvm_float_ty, llvm_float_ty, llvm_float_ty]>; + foreach variant = ["", "_sat", "_relu"] in { + foreach ftz = ["", "_ftz"] in { + def int_nvvm_fma_rn # ftz # variant # _f16 : + DefaultAttrsIntrinsic<[llvm_half_ty], + [llvm_half_ty, llvm_half_ty, llvm_half_ty], + [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_fma_ # rnd # _d : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_double_ty], - [llvm_double_ty, llvm_double_ty, llvm_double_ty]>; - } + def int_nvvm_fma_rn # ftz # variant # _f16x2 : + DefaultAttrsIntrinsic<[llvm_v2f16_ty], + [llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty], + [IntrNoMem, IntrSpeculatable]>; + + def int_nvvm_fma_rn # ftz # variant # _bf16 : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_bfloat_ty], + [llvm_bfloat_ty, llvm_bfloat_ty, llvm_bfloat_ty], + [IntrNoMem, IntrSpeculatable]>; + + def int_nvvm_fma_rn # ftz # variant # _bf16x2 : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_v2bf16_ty], + [llvm_v2bf16_ty, llvm_v2bf16_ty, llvm_v2bf16_ty], + [IntrNoMem, IntrSpeculatable]>; + } // ftz + } // variant + + foreach rnd = ["rn", "rz", "rm", "rp"] in { + foreach ftz = ["", "_ftz"] in + def int_nvvm_fma_ # rnd # ftz # _f : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], + [llvm_float_ty, llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable]>; + + def int_nvvm_fma_ # rnd # _d : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_double_ty], + [llvm_double_ty, llvm_double_ty, llvm_double_ty], + [IntrNoMem, IntrSpeculatable]>; } // // Rcp // - let IntrProperties = [IntrNoMem] in { - foreach rnd = ["rn", "rz", "rm", "rp"] in { - foreach ftz = ["", "_ftz"] in - def int_nvvm_rcp_ # rnd # ftz # _f : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty]>; - def int_nvvm_rcp_ # rnd # _d : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty]>; - } + foreach rnd = ["rn", "rz", "rm", "rp"] in { + foreach ftz = ["", "_ftz"] in + def int_nvvm_rcp_ # rnd # ftz # _f : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_nvvm_rcp_approx_ftz_f : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty]>; - def int_nvvm_rcp_approx_ftz_d : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty]>; + def int_nvvm_rcp_ # rnd # _d : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; } + def int_nvvm_rcp_approx_ftz_f : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + def int_nvvm_rcp_approx_ftz_d : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + // // Sqrt // - let IntrProperties = [IntrNoMem] in { - foreach rnd = ["rn", "rz", "rm", "rp"] in { - foreach ftz = ["", "_ftz"] in - def int_nvvm_sqrt_ # rnd # ftz # _f : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty]>; + foreach rnd = ["rn", "rz", "rm", "rp"] in { + foreach ftz = ["", "_ftz"] in + def int_nvvm_sqrt_ # rnd # ftz # _f : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_nvvm_sqrt_ # rnd # _d : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty]>; - } + def int_nvvm_sqrt_ # rnd # _d : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + } - def int_nvvm_sqrt_f : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty]>; + def int_nvvm_sqrt_f : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - foreach ftz = ["", "_ftz"] in - def int_nvvm_sqrt_approx # ftz # _f : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty]>; - } + foreach ftz = ["", "_ftz"] in + def int_nvvm_sqrt_approx # ftz # _f : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; // // Rsqrt // - let IntrProperties = [IntrNoMem] in { - foreach ftz = ["", "_ftz"] in { - def int_nvvm_rsqrt_approx # ftz # _f : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty]>; - def int_nvvm_rsqrt_approx # ftz # _d : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty]>; - } + + foreach ftz = ["", "_ftz"] in { + def int_nvvm_rsqrt_approx # ftz # _f : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + def int_nvvm_rsqrt_approx # ftz # _d : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; } // // Add // - let IntrProperties = [IntrNoMem, IntrSpeculatable, Commutative] in { - foreach rnd = ["rn", "rz", "rm", "rp"] in { - foreach ftz = ["", "_ftz"] in - def int_nvvm_add_ # rnd # ftz # _f : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty]>; + + foreach rnd = ["rn", "rz", "rm", "rp"] in { + foreach ftz = ["", "_ftz"] in + def int_nvvm_add_ # rnd # ftz # _f : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_add_ # rnd # _d : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty]>; - } + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; } // @@ -1098,134 +1191,135 @@ let TargetPrefix = "nvvm" in { // // Convert // - let IntrProperties = [IntrNoMem, IntrSpeculatable] in { - def int_nvvm_lohi_i2d : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty, llvm_i32_ty]>; - def int_nvvm_d2i_lo : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty]>; - def int_nvvm_d2i_hi : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty]>; + def int_nvvm_lohi_i2d : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; - foreach rnd = ["rn", "rz", "rm", "rp"] in { - foreach ftz = ["", "_ftz"] in - def int_nvvm_d2f_ # rnd # ftz : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty]>; + def int_nvvm_d2i_lo : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; + def int_nvvm_d2i_hi : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; - foreach sign = ["", "u"] in { - def int_nvvm_d2 # sign # i_ # rnd : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty]>; + foreach rnd = ["rn", "rz", "rm", "rp"] in { + foreach ftz = ["", "_ftz"] in + def int_nvvm_d2f_ # rnd # ftz : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_ # sign # i2d_ # rnd : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty]>; + foreach sign = ["", "u"] in { - foreach ftz = ["", "_ftz"] in - def int_nvvm_f2 # sign # i_ # rnd # ftz : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty]>; + def int_nvvm_d2 # sign # i_ # rnd : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_ # sign # i2f_ # rnd : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty]>; + def int_nvvm_ # sign # i2d_ # rnd : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; - foreach ftz = ["", "_ftz"] in - def int_nvvm_f2 # sign # ll_ # rnd # ftz : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty]>; + foreach ftz = ["", "_ftz"] in + def int_nvvm_f2 # sign # i_ # rnd # ftz : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_d2 # sign # ll_ # rnd : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty]>; + def int_nvvm_ # sign # i2f_ # rnd : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_ # sign # ll2f_ # rnd : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty]>; + foreach ftz = ["", "_ftz"] in + def int_nvvm_f2 # sign # ll_ # rnd # ftz : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_ # sign # ll2d_ # rnd : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty]>; + def int_nvvm_d2 # sign # ll_ # rnd : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; - } // sign - } // rnd + def int_nvvm_ # sign # ll2f_ # rnd : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; - foreach ftz = ["", "_ftz"] in { - def int_nvvm_f2h_rn # ftz : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty]>; + def int_nvvm_ # sign # ll2d_ # rnd : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; + } // sign + } // rnd - def int_nvvm_bf2h_rn # ftz : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_bfloat_ty]>; - } - } - let IntrProperties = [IntrNoMem, IntrNoCallback] in { - foreach rnd = ["rn", "rz"] in { - foreach relu = ["", "_relu"] in { - def int_nvvm_ff2bf16x2_ # rnd # relu : NVVMBuiltin, - Intrinsic<[llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty]>; + foreach ftz = ["", "_ftz"] in { + def int_nvvm_f2h_rn # ftz : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; - def int_nvvm_ff2f16x2_ # rnd # relu : NVVMBuiltin, - Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty]>; + def int_nvvm_bf2h_rn # ftz : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_bfloat_ty], [IntrNoMem, IntrSpeculatable]>; + } - def int_nvvm_f2bf16_ # rnd # relu : NVVMBuiltin, - Intrinsic<[llvm_bfloat_ty], [llvm_float_ty]>; - } - } + foreach rnd = ["rn", "rz"] in { + foreach relu = ["", "_relu"] in { + def int_nvvm_ff2bf16x2_ # rnd # relu : NVVMBuiltin, + Intrinsic<[llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>; - foreach satfinite = ["", "_satfinite"] in { - def int_nvvm_f2tf32_rna # satfinite : NVVMBuiltin, - Intrinsic<[llvm_i32_ty], [llvm_float_ty]>; + def int_nvvm_ff2f16x2_ # rnd # relu : NVVMBuiltin, + Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>; - foreach rnd = ["rn", "rz"] in - foreach relu = ["", "_relu"] in - def int_nvvm_f2tf32_ # rnd # relu # satfinite : NVVMBuiltin, - Intrinsic<[llvm_i32_ty], [llvm_float_ty]>; + def int_nvvm_f2bf16_ # rnd # relu : NVVMBuiltin, + Intrinsic<[llvm_bfloat_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>; } + } - foreach type = ["e4m3x2", "e5m2x2"] in { - foreach relu = ["", "_relu"] in { - def int_nvvm_ff_to_ # type # _rn # relu : NVVMBuiltin, - Intrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty]>; - def int_nvvm_f16x2_to_ # type # _rn # relu : NVVMBuiltin, - Intrinsic<[llvm_i16_ty], [llvm_v2f16_ty]>; + foreach satfinite = ["", "_satfinite"] in { + def int_nvvm_f2tf32_rna # satfinite : NVVMBuiltin, + Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>; - def int_nvvm_ # type # _to_f16x2_rn # relu : NVVMBuiltin, - Intrinsic<[llvm_v2f16_ty], [llvm_i16_ty]>; - } - } + foreach rnd = ["rn", "rz"] in + foreach relu = ["", "_relu"] in + def int_nvvm_f2tf32_ # rnd # relu # satfinite : NVVMBuiltin, + Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>; + } - // FP4 conversions. + foreach type = ["e4m3x2", "e5m2x2"] in { foreach relu = ["", "_relu"] in { - def int_nvvm_ff_to_e2m1x2_rn # relu # _satfinite : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty]>; + def int_nvvm_ff_to_ # type # _rn # relu : NVVMBuiltin, + Intrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>; + + def int_nvvm_f16x2_to_ # type # _rn # relu : NVVMBuiltin, + Intrinsic<[llvm_i16_ty], [llvm_v2f16_ty], [IntrNoMem, IntrNoCallback]>; - def int_nvvm_e2m1x2_to_f16x2_rn # relu : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_i16_ty]>; + def int_nvvm_ # type # _to_f16x2_rn # relu : NVVMBuiltin, + Intrinsic<[llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrNoCallback]>; } + } - // FP6 conversions. - foreach type = ["e2m3x2", "e3m2x2"] in { - foreach relu = ["", "_relu"] in { - def int_nvvm_ff_to_ # type # _rn # relu # _satfinite : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty]>; + // FP6 conversions. + foreach type = ["e2m3x2", "e3m2x2"] in { + foreach relu = ["", "_relu"] in { + def int_nvvm_ff_to_ # type # _rn # relu # _satfinite : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>; - def int_nvvm_ # type # _to_f16x2_rn # relu : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_i16_ty]>; - } + def int_nvvm_ # type # _to_f16x2_rn # relu : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrNoCallback]>; } + } - // UE8M0x2 conversions. - foreach rmode = ["_rz", "_rp"] in { - foreach satmode = ["", "_satfinite"] in { - defvar suffix = rmode # satmode; - def int_nvvm_ff_to_ue8m0x2 # suffix : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty]>; + // FP4 conversions. + foreach relu = ["", "_relu"] in { + def int_nvvm_ff_to_e2m1x2_rn # relu # _satfinite : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>; - def int_nvvm_bf16x2_to_ue8m0x2 # suffix : NVVMBuiltin, - DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_v2bf16_ty]>; + def int_nvvm_e2m1x2_to_f16x2_rn # relu : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_i16_ty], [IntrNoMem, IntrNoCallback]>; + } + + // UE8M0x2 conversions. + foreach rmode = ["_rz", "_rp"] in { + foreach satmode = ["", "_satfinite"] in { + defvar suffix = rmode # satmode; + def int_nvvm_ff_to_ue8m0x2 # suffix : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>; - } - } + def int_nvvm_bf16x2_to_ue8m0x2 # suffix : NVVMBuiltin, + DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_v2bf16_ty], [IntrNoMem, IntrNoCallback]>; - def int_nvvm_ue8m0x2_to_bf16x2 : NVVMBuiltin, - Intrinsic<[llvm_v2bf16_ty], [llvm_i16_ty]>; + } } + def int_nvvm_ue8m0x2_to_bf16x2 : NVVMBuiltin, + Intrinsic<[llvm_v2bf16_ty], [llvm_i16_ty], [IntrNoMem, IntrNoCallback]>; + // FNS + def int_nvvm_fns : NVVMBuiltin, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; @@ -1329,16 +1423,14 @@ foreach scope = ["cta", "cluster", "gpu", "sys"] in { } // Async Copy -let IntrProperties = [IntrConvergent, IntrNoCallback] in { - def int_nvvm_cp_async_mbarrier_arrive : NVVMBuiltin, - Intrinsic<[],[llvm_ptr_ty]>; - def int_nvvm_cp_async_mbarrier_arrive_shared : NVVMBuiltin, - Intrinsic<[],[llvm_shared_ptr_ty]>; - def int_nvvm_cp_async_mbarrier_arrive_noinc : NVVMBuiltin, - Intrinsic<[],[llvm_ptr_ty]>; - def int_nvvm_cp_async_mbarrier_arrive_noinc_shared : NVVMBuiltin, - Intrinsic<[],[llvm_shared_ptr_ty]>; -} +def int_nvvm_cp_async_mbarrier_arrive : NVVMBuiltin, + Intrinsic<[], [llvm_ptr_ty], [IntrConvergent, IntrNoCallback]>; +def int_nvvm_cp_async_mbarrier_arrive_shared : NVVMBuiltin, + Intrinsic<[], [llvm_shared_ptr_ty], [IntrConvergent, IntrNoCallback]>; +def int_nvvm_cp_async_mbarrier_arrive_noinc : NVVMBuiltin, + Intrinsic<[], [llvm_ptr_ty], [IntrConvergent, IntrNoCallback]>; +def int_nvvm_cp_async_mbarrier_arrive_noinc_shared : NVVMBuiltin, + Intrinsic<[], [llvm_shared_ptr_ty], [IntrConvergent, IntrNoCallback]>; multiclass CP_ASYNC_SHARED_GLOBAL { def NAME : Intrinsic<[], [llvm_shared_ptr_ty, llvm_global_ptr_ty], @@ -1416,11 +1508,15 @@ def int_nvvm_mbarrier_pending_count : NVVMBuiltin, // Generated within nvvm. Use for ldu on sm_20 or later. Second arg is the // pointer's alignment. -let IntrProperties = [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture>] in { - def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty], [llvm_anyptr_ty, llvm_i32_ty]>; - def int_nvvm_ldu_global_f : Intrinsic<[llvm_anyfloat_ty], [llvm_anyptr_ty, llvm_i32_ty]>; - def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty, llvm_i32_ty]>; -} +def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty], + [llvm_anyptr_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture>]>; +def int_nvvm_ldu_global_f : Intrinsic<[llvm_anyfloat_ty], + [llvm_anyptr_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture>]>; +def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty], + [llvm_anyptr_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture>]>; // Represents an explicit hole in the LLVM IR type system. It may be inserted by // the compiler in cases where a pointer is of the wrong type. In the backend @@ -1454,8 +1550,8 @@ def int_nvvm_texsurf_handle_internal : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty], [IntrNoMem]>; /// Error / Warn -def int_nvvm_compiler_error : Intrinsic<[], [llvm_anyptr_ty]>; -def int_nvvm_compiler_warn : Intrinsic<[], [llvm_anyptr_ty]>; +def int_nvvm_compiler_error : Intrinsic<[], [llvm_anyptr_ty], []>; +def int_nvvm_compiler_warn : Intrinsic<[], [llvm_anyptr_ty], []>; def int_nvvm_reflect : NVVMBuiltin, Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>; @@ -1472,158 +1568,1792 @@ foreach i = 0...31 in DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef]>; - -foreach is_unified = [true, false] in { - defvar mode = !if(is_unified, "_unified", ""); - defvar addr_args = !if(is_unified, [llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty]); - - // Texture Fetch - foreach vec = [V4F32, V4S32, V4U32] in { - foreach is_array = [true, false] in { - defvar array = !if(is_array, "_array", ""); - defvar array_args = !if(is_array, [llvm_i32_ty], []); - - def int_nvvm_tex # mode # _1d # array # _ # vec.Name # _s32 - : Intrinsic; - def int_nvvm_tex # mode # _1d # array # _ # vec.Name # _f32 - : Intrinsic; - def int_nvvm_tex # mode # _1d # array # _level_ # vec.Name # _f32 - : Intrinsic; - def int_nvvm_tex # mode # _1d # array # _grad_ # vec.Name # _f32 - : Intrinsic; - - def int_nvvm_tex # mode # _2d # array # _ # vec.Name # _s32 - : Intrinsic; - def int_nvvm_tex # mode # _2d # array # _ # vec.Name # _f32 - : Intrinsic; - def int_nvvm_tex # mode # _2d # array # _level_ # vec.Name # _f32 - : Intrinsic; - def int_nvvm_tex # mode # _2d # array # _grad_ # vec.Name # _f32 - : Intrinsic; - - if !not(is_array) then { - def int_nvvm_tex # mode # _3d_ # vec.Name # _s32 - : Intrinsic; - def int_nvvm_tex # mode # _3d_ # vec.Name # _f32 - : Intrinsic; - def int_nvvm_tex # mode # _3d_level_ # vec.Name # _f32 - : Intrinsic; - def int_nvvm_tex # mode # _3d_grad_ # vec.Name # _f32 - : Intrinsic; - } - - def int_nvvm_tex # mode # _cube # array # _ # vec.Name # _f32 - : Intrinsic; - def int_nvvm_tex # mode # _cube # array # _level_ # vec.Name # _f32 - : Intrinsic; - - if is_unified then - def int_nvvm_tex # mode # _cube # array # _grad_ # vec.Name # _f32 - : Intrinsic; - } // is_array - - foreach comp = ["r", "g", "b", "a"] in { - def int_nvvm_tld4 # mode # _ # comp # _2d_ # vec.Name # _f32 - : Intrinsic; - } // comp - } // vec -} // is_unified - +// Texture Fetch +// texmode_independent +def int_nvvm_tex_1d_v4f32_s32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], []>; +def int_nvvm_tex_1d_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty], []>; +def int_nvvm_tex_1d_level_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_1d_grad_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], []>; +def int_nvvm_tex_1d_v4s32_s32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], []>; +def int_nvvm_tex_1d_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty], []>; +def int_nvvm_tex_1d_level_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_1d_grad_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], []>; +def int_nvvm_tex_1d_v4u32_s32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], []>; +def int_nvvm_tex_1d_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty], []>; +def int_nvvm_tex_1d_level_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_1d_grad_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], []>; + +def int_nvvm_tex_1d_array_v4f32_s32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_tex_1d_array_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty], []>; +def int_nvvm_tex_1d_array_level_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty], []>; +def int_nvvm_tex_1d_array_grad_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_1d_array_v4s32_s32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_tex_1d_array_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty], []>; +def int_nvvm_tex_1d_array_level_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty], []>; +def int_nvvm_tex_1d_array_grad_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_1d_array_v4u32_s32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_tex_1d_array_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty], []>; +def int_nvvm_tex_1d_array_level_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty], []>; +def int_nvvm_tex_1d_array_grad_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], []>; + +def int_nvvm_tex_2d_v4f32_s32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_tex_2d_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_2d_level_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], []>; +def int_nvvm_tex_2d_grad_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_2d_v4s32_s32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_tex_2d_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_2d_level_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], []>; +def int_nvvm_tex_2d_grad_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_2d_v4u32_s32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_tex_2d_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_2d_level_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], []>; +def int_nvvm_tex_2d_grad_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; + +def int_nvvm_tex_2d_array_v4f32_s32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty], []>; +def int_nvvm_tex_2d_array_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty], []>; +def int_nvvm_tex_2d_array_level_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_2d_array_grad_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], []>; +def int_nvvm_tex_2d_array_v4s32_s32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty], []>; +def int_nvvm_tex_2d_array_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty], []>; +def int_nvvm_tex_2d_array_level_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_2d_array_grad_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], []>; +def int_nvvm_tex_2d_array_v4u32_s32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty], []>; +def int_nvvm_tex_2d_array_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty], []>; +def int_nvvm_tex_2d_array_level_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_2d_array_grad_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], []>; + +def int_nvvm_tex_3d_v4f32_s32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + []>; +def int_nvvm_tex_3d_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], []>; +def int_nvvm_tex_3d_level_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_3d_grad_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_3d_v4s32_s32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + []>; +def int_nvvm_tex_3d_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], []>; +def int_nvvm_tex_3d_level_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_3d_grad_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_3d_v4u32_s32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + []>; +def int_nvvm_tex_3d_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], []>; +def int_nvvm_tex_3d_level_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_3d_grad_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; + +def int_nvvm_tex_cube_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_cube_level_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_cube_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_cube_level_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_cube_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_cube_level_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; + +def int_nvvm_tex_cube_array_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_cube_array_level_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_cube_array_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_cube_array_level_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_cube_array_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_cube_array_level_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; + +def int_nvvm_tld4_r_2d_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tld4_g_2d_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tld4_b_2d_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tld4_a_2d_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tld4_r_2d_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tld4_g_2d_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tld4_b_2d_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tld4_a_2d_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tld4_r_2d_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tld4_g_2d_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tld4_b_2d_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tld4_a_2d_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; +// texmode_unified +def int_nvvm_tex_unified_1d_v4f32_s32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i32_ty], []>; +def int_nvvm_tex_unified_1d_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_float_ty], []>; +def int_nvvm_tex_unified_1d_level_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_unified_1d_grad_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], []>; +def int_nvvm_tex_unified_1d_v4s32_s32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty], []>; +def int_nvvm_tex_unified_1d_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty], []>; +def int_nvvm_tex_unified_1d_level_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_unified_1d_grad_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], []>; +def int_nvvm_tex_unified_1d_v4u32_s32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty], []>; +def int_nvvm_tex_unified_1d_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty], []>; +def int_nvvm_tex_unified_1d_level_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_unified_1d_grad_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], []>; + +def int_nvvm_tex_unified_1d_array_v4f32_s32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_tex_unified_1d_array_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_float_ty], []>; +def int_nvvm_tex_unified_1d_array_level_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty], []>; +def int_nvvm_tex_unified_1d_array_grad_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_unified_1d_array_v4s32_s32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_tex_unified_1d_array_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_float_ty], []>; +def int_nvvm_tex_unified_1d_array_level_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty], []>; +def int_nvvm_tex_unified_1d_array_grad_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_unified_1d_array_v4u32_s32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_tex_unified_1d_array_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_float_ty], []>; +def int_nvvm_tex_unified_1d_array_level_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty], []>; +def int_nvvm_tex_unified_1d_array_grad_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], []>; + +def int_nvvm_tex_unified_2d_v4f32_s32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_tex_unified_2d_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_unified_2d_level_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], []>; +def int_nvvm_tex_unified_2d_grad_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_unified_2d_v4s32_s32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_tex_unified_2d_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_unified_2d_level_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], []>; +def int_nvvm_tex_unified_2d_grad_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_unified_2d_v4u32_s32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_tex_unified_2d_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_unified_2d_level_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], []>; +def int_nvvm_tex_unified_2d_grad_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; + +def int_nvvm_tex_unified_2d_array_v4f32_s32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty], []>; +def int_nvvm_tex_unified_2d_array_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty], []>; +def int_nvvm_tex_unified_2d_array_level_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_unified_2d_array_grad_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], []>; +def int_nvvm_tex_unified_2d_array_v4s32_s32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty], []>; +def int_nvvm_tex_unified_2d_array_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty], []>; +def int_nvvm_tex_unified_2d_array_level_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_unified_2d_array_grad_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], []>; +def int_nvvm_tex_unified_2d_array_v4u32_s32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty], []>; +def int_nvvm_tex_unified_2d_array_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty], []>; +def int_nvvm_tex_unified_2d_array_level_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_unified_2d_array_grad_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], []>; + +def int_nvvm_tex_unified_3d_v4f32_s32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + []>; +def int_nvvm_tex_unified_3d_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], []>; +def int_nvvm_tex_unified_3d_level_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_unified_3d_grad_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_unified_3d_v4s32_s32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + []>; +def int_nvvm_tex_unified_3d_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], []>; +def int_nvvm_tex_unified_3d_level_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_unified_3d_grad_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_unified_3d_v4u32_s32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + []>; +def int_nvvm_tex_unified_3d_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty], []>; +def int_nvvm_tex_unified_3d_level_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_unified_3d_grad_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; + +def int_nvvm_tex_unified_cube_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_unified_cube_level_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_unified_cube_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_unified_cube_level_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_unified_cube_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_unified_cube_level_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; + +def int_nvvm_tex_unified_cube_array_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i32_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_unified_cube_array_level_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i32_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_unified_cube_array_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_unified_cube_array_level_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_unified_cube_array_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_unified_cube_array_level_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; + +def int_nvvm_tex_unified_cube_grad_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_unified_cube_grad_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_unified_cube_grad_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; + +def int_nvvm_tex_unified_cube_array_grad_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_i32_ty, + llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_unified_cube_array_grad_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, + llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tex_unified_cube_array_grad_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, + llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; + +def int_nvvm_tld4_unified_r_2d_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tld4_unified_g_2d_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tld4_unified_b_2d_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tld4_unified_a_2d_v4f32_f32 + : Intrinsic<[llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tld4_unified_r_2d_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tld4_unified_g_2d_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tld4_unified_b_2d_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tld4_unified_a_2d_v4s32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tld4_unified_r_2d_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tld4_unified_g_2d_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tld4_unified_b_2d_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; +def int_nvvm_tld4_unified_a_2d_v4u32_f32 + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_float_ty], []>; //=== Surface Load -foreach clamp = ["clamp", "trap", "zero"] in { - foreach vec = [TV_I8, TV_I16, TV_I32, TV_I64, - TV_V2I8, TV_V2I16, TV_V2I32, TV_V2I64, - TV_V4I8, TV_V4I16, TV_V4I32] in { - - def int_nvvm_suld_1d_ # vec.Name # _ # clamp - : Intrinsic; - - def int_nvvm_suld_1d_array_ # vec.Name # _ # clamp - : Intrinsic; - - def int_nvvm_suld_2d_ # vec.Name # _ # clamp - : Intrinsic; - - def int_nvvm_suld_2d_array_ # vec.Name # _ # clamp - : Intrinsic; - - def int_nvvm_suld_3d_ # vec.Name # _ # clamp - : Intrinsic; - } // vec -} // clamp +// .clamp variants +def int_nvvm_suld_1d_i8_clamp + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_i16_clamp + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_i32_clamp + : Intrinsic<[llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_i64_clamp + : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_v2i8_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_v2i16_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_v2i32_clamp + : Intrinsic<[llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_v2i64_clamp + : Intrinsic<[llvm_i64_ty, llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_v4i8_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_v4i16_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_v4i32_clamp + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty], []>; + +def int_nvvm_suld_1d_array_i8_clamp + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_array_i16_clamp + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_array_i32_clamp + : Intrinsic<[llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_array_i64_clamp + : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_array_v2i8_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_array_v2i16_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_array_v2i32_clamp + : Intrinsic<[llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_array_v2i64_clamp + : Intrinsic<[llvm_i64_ty, llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_array_v4i8_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_array_v4i16_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_array_v4i32_clamp + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; + +def int_nvvm_suld_2d_i8_clamp + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_i16_clamp + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_i32_clamp + : Intrinsic<[llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_i64_clamp + : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_v2i8_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_v2i16_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_v2i32_clamp + : Intrinsic<[llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_v2i64_clamp + : Intrinsic<[llvm_i64_ty, llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_v4i8_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_v4i16_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_v4i32_clamp + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; + +def int_nvvm_suld_2d_array_i8_clamp + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_array_i16_clamp + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_array_i32_clamp + : Intrinsic<[llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_array_i64_clamp + : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_array_v2i8_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_array_v2i16_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_array_v2i32_clamp + : Intrinsic<[llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_array_v2i64_clamp + : Intrinsic<[llvm_i64_ty, llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_array_v4i8_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_array_v4i16_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_array_v4i32_clamp + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; + +def int_nvvm_suld_3d_i8_clamp + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_3d_i16_clamp + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_3d_i32_clamp + : Intrinsic<[llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_3d_i64_clamp + : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_3d_v2i8_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_3d_v2i16_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_3d_v2i32_clamp + : Intrinsic<[llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_3d_v2i64_clamp + : Intrinsic<[llvm_i64_ty, llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_3d_v4i8_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_3d_v4i16_clamp + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_3d_v4i32_clamp + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; + +// .trap variants +def int_nvvm_suld_1d_i8_trap + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_i16_trap + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_i32_trap + : Intrinsic<[llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_i64_trap + : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_v2i8_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_v2i16_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_v2i32_trap + : Intrinsic<[llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_v2i64_trap + : Intrinsic<[llvm_i64_ty, llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_v4i8_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_v4i16_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_v4i32_trap + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty], []>; + +def int_nvvm_suld_1d_array_i8_trap + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_array_i16_trap + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_array_i32_trap + : Intrinsic<[llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_array_i64_trap + : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_array_v2i8_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_array_v2i16_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_array_v2i32_trap + : Intrinsic<[llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_array_v2i64_trap + : Intrinsic<[llvm_i64_ty, llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_array_v4i8_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_array_v4i16_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_array_v4i32_trap + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; + +def int_nvvm_suld_2d_i8_trap + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_i16_trap + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_i32_trap + : Intrinsic<[llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_i64_trap + : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_v2i8_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_v2i16_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_v2i32_trap + : Intrinsic<[llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_v2i64_trap + : Intrinsic<[llvm_i64_ty, llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_v4i8_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_v4i16_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_v4i32_trap + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; + +def int_nvvm_suld_2d_array_i8_trap + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_array_i16_trap + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_array_i32_trap + : Intrinsic<[llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_array_i64_trap + : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_array_v2i8_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_array_v2i16_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_array_v2i32_trap + : Intrinsic<[llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_array_v2i64_trap + : Intrinsic<[llvm_i64_ty, llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_array_v4i8_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_array_v4i16_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_array_v4i32_trap + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; + +def int_nvvm_suld_3d_i8_trap + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_3d_i16_trap + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_3d_i32_trap + : Intrinsic<[llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_3d_i64_trap + : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_3d_v2i8_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_3d_v2i16_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_3d_v2i32_trap + : Intrinsic<[llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_3d_v2i64_trap + : Intrinsic<[llvm_i64_ty, llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_3d_v4i8_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_3d_v4i16_trap + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_3d_v4i32_trap + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; + +// .zero variants +def int_nvvm_suld_1d_i8_zero + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_i16_zero + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_i32_zero + : Intrinsic<[llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_i64_zero + : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_v2i8_zero + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_v2i16_zero + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_v2i32_zero + : Intrinsic<[llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_v2i64_zero + : Intrinsic<[llvm_i64_ty, llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_v4i8_zero + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_v4i16_zero + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_v4i32_zero + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty], []>; + +def int_nvvm_suld_1d_array_i8_zero + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_array_i16_zero + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_array_i32_zero + : Intrinsic<[llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_array_i64_zero + : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_array_v2i8_zero + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_array_v2i16_zero + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_array_v2i32_zero + : Intrinsic<[llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_array_v2i64_zero + : Intrinsic<[llvm_i64_ty, llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_array_v4i8_zero + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_array_v4i16_zero + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_1d_array_v4i32_zero + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; + +def int_nvvm_suld_2d_i8_zero + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_i16_zero + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_i32_zero + : Intrinsic<[llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_i64_zero + : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_v2i8_zero + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_v2i16_zero + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_v2i32_zero + : Intrinsic<[llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_v2i64_zero + : Intrinsic<[llvm_i64_ty, llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_v4i8_zero + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_v4i16_zero + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_v4i32_zero + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; + +def int_nvvm_suld_2d_array_i8_zero + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_array_i16_zero + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_array_i32_zero + : Intrinsic<[llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_array_i64_zero + : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_array_v2i8_zero + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_array_v2i16_zero + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_array_v2i32_zero + : Intrinsic<[llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_array_v2i64_zero + : Intrinsic<[llvm_i64_ty, llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_array_v4i8_zero + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_array_v4i16_zero + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_2d_array_v4i32_zero + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; + +def int_nvvm_suld_3d_i8_zero + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_3d_i16_zero + : Intrinsic<[llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_3d_i32_zero + : Intrinsic<[llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_3d_i64_zero + : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_3d_v2i8_zero + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_3d_v2i16_zero + : Intrinsic<[llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_3d_v2i32_zero + : Intrinsic<[llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_3d_v2i64_zero + : Intrinsic<[llvm_i64_ty, llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_3d_v4i8_zero + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_3d_v4i16_zero + : Intrinsic<[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_suld_3d_v4i32_zero + : Intrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; //===- Texture Query ------------------------------------------------------===// foreach query = ["channel_order", "channel_data_type", "width", "height", - "depth", "array_size", "num_samples", "num_mipmap_levels"] in + "depth", "array_size", "num_samples", "num_mipmap_levels"] in { def int_nvvm_txq_ # query : NVVMBuiltin, Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>; +} //===- Surface Query ------------------------------------------------------===// -foreach query = ["channel_order", "channel_data_type", "width", "height", - "depth", "array_size"] in +foreach query = ["channel_order", "channel_data_type", "width", "height", + "depth", "array_size"] in { def int_nvvm_suq_ # query : NVVMBuiltin, Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>; +} //===- Handle Query -------------------------------------------------------===// -foreach type = ["sampler", "surface", "texture"] in +foreach type = ["sampler", "surface", "texture"] in { def int_nvvm_istypep_ # type : NVVMBuiltin, Intrinsic<[llvm_i1_ty], [llvm_i64_ty], [IntrNoMem]>; +} //===- Surface Stores -----------------------------------------------------===// -multiclass SurfaceStoreIntrinsics { - def _1d_ # vec.Name # _ # clamp : NVVMBuiltin, - Intrinsic<[], !listconcat([llvm_i64_ty, llvm_i32_ty], vec.Types)>; - - def _1d_array_ # vec.Name # _ # clamp : NVVMBuiltin, - Intrinsic<[], !listconcat([llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], vec.Types)>; - - def _2d_ # vec.Name # _ # clamp : NVVMBuiltin, - Intrinsic<[], !listconcat([llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], vec.Types)>; - - def _2d_array_ # vec.Name # _ # clamp : NVVMBuiltin, - Intrinsic<[], !listconcat([llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], vec.Types)>; - - def _3d_ # vec.Name # _ # clamp : NVVMBuiltin, - Intrinsic<[], !listconcat([llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], vec.Types)>; -} - // Unformatted -foreach clamp = ["clamp", "trap", "zero"] in - foreach vec = [TV_I8, TV_I16, TV_I32, TV_I64, - TV_V2I8, TV_V2I16, TV_V2I32, TV_V2I64, - TV_V4I8, TV_V4I16, TV_V4I32] in - defm int_nvvm_sust_b : SurfaceStoreIntrinsics; +// .clamp variant +def int_nvvm_sust_b_1d_i8_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_1d_i16_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_1d_i32_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_1d_i64_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], []>; +def int_nvvm_sust_b_1d_v2i8_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_1d_v2i16_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_1d_v2i32_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_1d_v2i64_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], []>; +def int_nvvm_sust_b_1d_v4i8_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_1d_v4i16_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_1d_v4i32_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_1d_array_i8_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_1d_array_i16_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_1d_array_i32_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_1d_array_i64_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], []>; +def int_nvvm_sust_b_1d_array_v2i8_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_1d_array_v2i16_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_1d_array_v2i32_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_1d_array_v2i64_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i64_ty, llvm_i64_ty], []>; +def int_nvvm_sust_b_1d_array_v4i8_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_1d_array_v4i16_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_1d_array_v4i32_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_2d_i8_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_2d_i16_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_2d_i32_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_2d_i64_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], []>; +def int_nvvm_sust_b_2d_v2i8_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_2d_v2i16_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_2d_v2i32_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_2d_v2i64_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i64_ty, llvm_i64_ty], []>; +def int_nvvm_sust_b_2d_v4i8_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_2d_v4i16_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_2d_v4i32_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_2d_array_i8_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_2d_array_i16_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_2d_array_i32_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_2d_array_i64_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i64_ty], []>; +def int_nvvm_sust_b_2d_array_v2i8_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_2d_array_v2i16_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_2d_array_v2i32_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_2d_array_v2i64_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i64_ty, llvm_i64_ty], []>; +def int_nvvm_sust_b_2d_array_v4i8_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_2d_array_v4i16_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_2d_array_v4i32_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_3d_i8_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_3d_i16_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_3d_i32_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_3d_i64_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i64_ty], []>; +def int_nvvm_sust_b_3d_v2i8_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_3d_v2i16_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_3d_v2i32_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_3d_v2i64_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i64_ty, llvm_i64_ty], []>; +def int_nvvm_sust_b_3d_v4i8_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_3d_v4i16_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_3d_v4i32_clamp : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +// .trap variant +def int_nvvm_sust_b_1d_i8_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_1d_i16_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_1d_i32_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_1d_i64_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], []>; +def int_nvvm_sust_b_1d_v2i8_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_1d_v2i16_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_1d_v2i32_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_1d_v2i64_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], []>; +def int_nvvm_sust_b_1d_v4i8_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_1d_v4i16_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_1d_v4i32_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_1d_array_i8_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_1d_array_i16_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_1d_array_i32_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_1d_array_i64_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], []>; +def int_nvvm_sust_b_1d_array_v2i8_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_1d_array_v2i16_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_1d_array_v2i32_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_1d_array_v2i64_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i64_ty, llvm_i64_ty], []>; +def int_nvvm_sust_b_1d_array_v4i8_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_1d_array_v4i16_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_1d_array_v4i32_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_2d_i8_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_2d_i16_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_2d_i32_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_2d_i64_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], []>; +def int_nvvm_sust_b_2d_v2i8_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_2d_v2i16_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_2d_v2i32_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_2d_v2i64_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i64_ty, llvm_i64_ty], []>; +def int_nvvm_sust_b_2d_v4i8_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_2d_v4i16_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_2d_v4i32_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_2d_array_i8_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_2d_array_i16_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_2d_array_i32_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_2d_array_i64_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i64_ty], []>; +def int_nvvm_sust_b_2d_array_v2i8_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_2d_array_v2i16_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_2d_array_v2i32_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_2d_array_v2i64_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i64_ty, llvm_i64_ty], []>; +def int_nvvm_sust_b_2d_array_v4i8_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_2d_array_v4i16_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_2d_array_v4i32_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_3d_i8_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_3d_i16_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_3d_i32_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_3d_i64_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i64_ty], []>; +def int_nvvm_sust_b_3d_v2i8_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_3d_v2i16_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_3d_v2i32_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_3d_v2i64_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i64_ty, llvm_i64_ty], []>; +def int_nvvm_sust_b_3d_v4i8_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_3d_v4i16_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_3d_v4i32_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +// .zero variant +def int_nvvm_sust_b_1d_i8_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_1d_i16_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_1d_i32_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_1d_i64_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], []>; +def int_nvvm_sust_b_1d_v2i8_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_1d_v2i16_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_1d_v2i32_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_1d_v2i64_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], []>; +def int_nvvm_sust_b_1d_v4i8_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_1d_v4i16_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_1d_v4i32_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_1d_array_i8_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_1d_array_i16_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_1d_array_i32_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_1d_array_i64_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], []>; +def int_nvvm_sust_b_1d_array_v2i8_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_1d_array_v2i16_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_1d_array_v2i32_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_1d_array_v2i64_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i64_ty, llvm_i64_ty], []>; +def int_nvvm_sust_b_1d_array_v4i8_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_1d_array_v4i16_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_1d_array_v4i32_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_2d_i8_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_2d_i16_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_2d_i32_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_2d_i64_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], []>; +def int_nvvm_sust_b_2d_v2i8_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_2d_v2i16_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_2d_v2i32_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_2d_v2i64_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i64_ty, llvm_i64_ty], []>; +def int_nvvm_sust_b_2d_v4i8_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_2d_v4i16_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_2d_v4i32_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_2d_array_i8_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_2d_array_i16_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_2d_array_i32_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_2d_array_i64_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i64_ty], []>; +def int_nvvm_sust_b_2d_array_v2i8_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_2d_array_v2i16_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_2d_array_v2i32_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_2d_array_v2i64_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i64_ty, llvm_i64_ty], []>; +def int_nvvm_sust_b_2d_array_v4i8_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_2d_array_v4i16_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_2d_array_v4i32_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_3d_i8_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_3d_i16_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_3d_i32_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_3d_i64_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i64_ty], []>; +def int_nvvm_sust_b_3d_v2i8_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_3d_v2i16_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_3d_v2i32_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_b_3d_v2i64_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i64_ty, llvm_i64_ty], []>; +def int_nvvm_sust_b_3d_v4i8_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_3d_v4i16_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_b_3d_v4i32_zero : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; // Formatted -foreach vec = [TV_I8, TV_I16, TV_I32, - TV_V2I8, TV_V2I16, TV_V2I32, - TV_V4I8, TV_V4I16, TV_V4I32] in - defm int_nvvm_sust_p : SurfaceStoreIntrinsics<"trap", vec>; + +def int_nvvm_sust_p_1d_i8_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_p_1d_i16_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_p_1d_i32_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_p_1d_v2i8_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_p_1d_v2i16_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_p_1d_v2i32_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_p_1d_v4i8_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_p_1d_v4i16_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_p_1d_v4i32_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_p_1d_array_i8_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_p_1d_array_i16_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_p_1d_array_i32_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_p_1d_array_v2i8_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_p_1d_array_v2i16_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_p_1d_array_v2i32_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_p_1d_array_v4i8_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_p_1d_array_v4i16_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_p_1d_array_v4i32_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_p_2d_i8_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_p_2d_i16_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_p_2d_i32_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_p_2d_v2i8_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_p_2d_v2i16_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_p_2d_v2i32_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_p_2d_v4i8_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_p_2d_v4i16_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i16_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_p_2d_v4i32_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_p_2d_array_i8_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_p_2d_array_i16_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_p_2d_array_i32_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_p_2d_array_v2i8_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_p_2d_array_v2i16_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_p_2d_array_v2i32_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_p_2d_array_v4i8_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_p_2d_array_v4i16_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_p_2d_array_v4i32_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_p_3d_i8_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_p_3d_i16_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i16_ty], []>; +def int_nvvm_sust_p_3d_i32_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_p_3d_v2i8_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_p_3d_v2i16_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_p_3d_v2i32_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], []>; +def int_nvvm_sust_p_3d_v4i8_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_p_3d_v4i16_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty], []>; +def int_nvvm_sust_p_3d_v4i32_trap : NVVMBuiltin, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; // Accessing special registers. @@ -1745,16 +3475,19 @@ def int_nvvm_read_ptx_sreg_cluster_nctarank : PTXReadSRegIntrinsicNB_r32; // SHUFFLE // // Generate intrinsics for all variants of shfl instruction. -let IntrProperties = [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback] in { - foreach sync = [false, true] in { - foreach mode = ["up", "down", "bfly", "idx"] in { - foreach type = ["i32", "f32"] in { - foreach return_pred = [false, true] in { - defvar i = SHFL_INFO; - if i.withGccBuiltin then - def i.Name : NVVMBuiltin, Intrinsic; - else - def i.Name : Intrinsic; +foreach sync = [false, true] in { + foreach mode = ["up", "down", "bfly", "idx"] in { + foreach type = ["i32", "f32"] in { + foreach return_pred = [false, true] in { + defvar i = SHFL_INFO; + if i.withGccBuiltin then { + def i.Name : NVVMBuiltin, + Intrinsic; + } else { + def i.Name : + Intrinsic; } } } @@ -1765,21 +3498,43 @@ let IntrProperties = [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback] i // VOTE // -let IntrProperties = [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback] in { - def int_nvvm_vote_all : NVVMBuiltin, Intrinsic<[llvm_i1_ty], [llvm_i1_ty]>; - def int_nvvm_vote_any : NVVMBuiltin, Intrinsic<[llvm_i1_ty], [llvm_i1_ty]>; - def int_nvvm_vote_uni : NVVMBuiltin, Intrinsic<[llvm_i1_ty], [llvm_i1_ty]>; - def int_nvvm_vote_ballot : NVVMBuiltin, Intrinsic<[llvm_i32_ty], [llvm_i1_ty]>; -} +// vote.all pred +def int_nvvm_vote_all : NVVMBuiltin, + Intrinsic<[llvm_i1_ty], [llvm_i1_ty], + [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback]>; +// vote.any pred +def int_nvvm_vote_any : NVVMBuiltin, + Intrinsic<[llvm_i1_ty], [llvm_i1_ty], + [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback]>; +// vote.uni pred +def int_nvvm_vote_uni : NVVMBuiltin, + Intrinsic<[llvm_i1_ty], [llvm_i1_ty], + [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback]>; +// vote.ballot pred +def int_nvvm_vote_ballot : NVVMBuiltin, + Intrinsic<[llvm_i32_ty], [llvm_i1_ty], + [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback]>; + // // VOTE.SYNC // -let IntrProperties = [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback] in { - def int_nvvm_vote_all_sync : NVVMBuiltin, Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_i1_ty]>; - def int_nvvm_vote_any_sync : NVVMBuiltin, Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_i1_ty]>; - def int_nvvm_vote_uni_sync : NVVMBuiltin, Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_i1_ty]>; - def int_nvvm_vote_ballot_sync : NVVMBuiltin, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i1_ty]>; -} + +// vote.sync.all mask, pred +def int_nvvm_vote_all_sync : NVVMBuiltin, + Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_i1_ty], + [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback]>; +// vote.sync.any mask, pred +def int_nvvm_vote_any_sync : NVVMBuiltin, + Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_i1_ty], + [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback]>; +// vote.sync.uni mask, pred +def int_nvvm_vote_uni_sync : NVVMBuiltin, + Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_i1_ty], + [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback]>; +// vote.sync.ballot mask, pred +def int_nvvm_vote_ballot_sync : NVVMBuiltin, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i1_ty], + [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback]>; // // ACTIVEMASK @@ -1791,25 +3546,28 @@ def int_nvvm_activemask : NVVMBuiltin, // // MATCH.SYNC // -let IntrProperties = [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback] in { - // match.any.sync.b32 mask, value - def int_nvvm_match_any_sync_i32 : NVVMBuiltin, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty]>; - // match.any.sync.b64 mask, value - def int_nvvm_match_any_sync_i64 : NVVMBuiltin, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty]>; - - // match.all instruction have two variants -- one returns a single value, another - // returns a pair {value, predicate}. We currently only implement the latter as - // that's the variant exposed by CUDA API. - - // match.all.sync.b32p mask, value - def int_nvvm_match_all_sync_i32p : - Intrinsic<[llvm_i32_ty, llvm_i1_ty], [llvm_i32_ty, llvm_i32_ty]>; - // match.all.sync.b64p mask, value - def int_nvvm_match_all_sync_i64p : - Intrinsic<[llvm_i32_ty, llvm_i1_ty], [llvm_i32_ty, llvm_i64_ty]>; -} +// match.any.sync.b32 mask, value +def int_nvvm_match_any_sync_i32 : NVVMBuiltin, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback]>; +// match.any.sync.b64 mask, value +def int_nvvm_match_any_sync_i64 : NVVMBuiltin, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty], + [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback]>; + +// match.all instruction have two variants -- one returns a single value, another +// returns a pair {value, predicate}. We currently only implement the latter as +// that's the variant exposed by CUDA API. + +// match.all.sync.b32p mask, value +def int_nvvm_match_all_sync_i32p : + Intrinsic<[llvm_i32_ty, llvm_i1_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback]>; +// match.all.sync.b64p mask, value +def int_nvvm_match_all_sync_i64p : + Intrinsic<[llvm_i32_ty, llvm_i1_ty], [llvm_i32_ty, llvm_i64_ty], + [IntrInaccessibleMemOnly, IntrConvergent, IntrNoCallback]>; + // // ELECT.SYNC // @@ -1823,17 +3581,21 @@ def int_nvvm_elect_sync : // // redux.sync.op.u32 dst, src, membermask; -let IntrProperties = [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback] in { - foreach op = ["umin", "umax", "add", "min", "max", "and", "xor", "or"] in - def int_nvvm_redux_sync_ # op : NVVMBuiltin, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty]>; - - // redux.sync.op.{abs}.{NaN}.f32 dst, src, membermask; - foreach binOp = ["min", "max"] in - foreach abs = ["", "_abs"] in - foreach NaN = ["", "_NaN"] in - def int_nvvm_redux_sync_f # binOp # abs # NaN : NVVMBuiltin, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty]>; +foreach op = ["umin", "umax", "add", "min", "max", "and", "xor", "or"] in { + def int_nvvm_redux_sync_ # op : NVVMBuiltin, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>; +} + +// redux.sync.op.{abs}.{NaN}.f32 dst, src, membermask; +foreach binOp = ["min", "max"] in { + foreach abs = ["", "_abs"] in { + foreach NaN = ["", "_NaN"] in { + def int_nvvm_redux_sync_f # binOp # abs # NaN : NVVMBuiltin, + Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty], + [IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>; + } + } } // @@ -1885,10 +3647,13 @@ foreach layout = ["row", "col"] in { } // WMMA.MMA -class NVVM_MMA +class NVVM_WMMA_MMA : Intrinsic; + [IntrNoMem, IntrNoCallback], + WMMA_NAME.llvm>; foreach layout_a = ["row", "col"] in { foreach layout_b = ["row", "col"] in { @@ -1899,7 +3664,8 @@ foreach layout_a = ["row", "col"] in { if NVVM_WMMA_SUPPORTED.ret then { def WMMA_NAME.record - : NVVM_MMA; + : NVVM_WMMA_MMA; } } // b1op } // op @@ -1908,6 +3674,14 @@ foreach layout_a = ["row", "col"] in { } // layout_b } // layout_a +// MMA +class NVVM_MMA + : Intrinsic.llvm>; + foreach layout_a = ["row", "col"] in { foreach layout_b = ["row", "col"] in { foreach satf = [0, 1] in { @@ -1915,7 +3689,7 @@ foreach layout_a = ["row", "col"] in { foreach b1op = NVVM_MMA_B1OPS.ret in { if NVVM_MMA_SUPPORTED.ret then { def MMA_NAME.record - : NVVM_MMA; + : NVVM_MMA; } } // b1op } // op @@ -1939,22 +3713,18 @@ foreach transposed = [0, 1] in { } } -// MAPA -let IntrProperties = [IntrNoMem, IntrSpeculatable, NoCapture>] in { - def int_nvvm_mapa - : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty]>; - def int_nvvm_mapa_shared_cluster - : DefaultAttrsIntrinsic<[llvm_shared_cluster_ptr_ty], [llvm_shared_ptr_ty, llvm_i32_ty]>; -} - -// GETCTARANK -let IntrProperties = [IntrNoMem, IntrSpeculatable, NoCapture>] in { - def int_nvvm_getctarank - : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_ptr_ty]>; - def int_nvvm_getctarank_shared_cluster - : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_shared_ptr_ty]>; -} - +def int_nvvm_mapa + : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable, NoCapture>]>; +def int_nvvm_mapa_shared_cluster + : DefaultAttrsIntrinsic<[llvm_shared_cluster_ptr_ty], [llvm_shared_ptr_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable, NoCapture>]>; +def int_nvvm_getctarank + : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_ptr_ty], + [IntrNoMem, IntrSpeculatable, NoCapture>]>; +def int_nvvm_getctarank_shared_cluster + : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_shared_ptr_ty], + [IntrNoMem, IntrSpeculatable, NoCapture>]>; def int_nvvm_is_explicit_cluster : DefaultAttrsIntrinsic<[llvm_i1_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef], @@ -1970,123 +3740,87 @@ foreach op = ["dec", "inc"] in def int_nvvm_exit : NVVMBuiltin, Intrinsic<[], [], [IntrConvergent, IntrInaccessibleMemOnly, IntrNoReturn]>; -class DefaultAttrsIntrinsicFlags ret_types, - list param_types, - list flags, - list intr_properties> - : DefaultAttrsIntrinsic< - ret_types, - !listconcat(param_types, flags), - !listconcat(intr_properties, - !foreach(i, !range(flags), - ImmArg>))>; - // Intrinsics for Tensor Copy using TMA // G2S -> From Global to Shared memory variants // S2G -> From Shared to Global memory variants -foreach dim = 1...5 in { - defvar tensor_dim_args = !listsplat(llvm_i32_ty, dim); - +foreach dim = [1, 2, 3, 4, 5] in { foreach mode = !if(!ge(dim, 3), ["tile", "im2col"], ["tile"]) in { - defvar is_im2col = !eq(mode, "im2col"); - defvar num_im2col_offsets = !if(is_im2col, !add(dim, -2), 0); - defvar im2col_offsets_args = !listsplat(llvm_i16_ty, num_im2col_offsets); - - def int_nvvm_cp_async_bulk_tensor_g2s_ # mode # _ # dim # d : - DefaultAttrsIntrinsicFlags<[], - !listconcat([llvm_shared_cluster_ptr_ty, // dst_shared_cluster_ptr - llvm_shared_ptr_ty, // mbarrier_smem_ptr - llvm_ptr_ty], // tensormap_ptr - tensor_dim_args, // actual tensor dims - im2col_offsets_args, // im2col offsets - [llvm_i16_ty, // cta_mask - llvm_i64_ty]), // cache_hint - [llvm_i1_ty, // Flag for cta_mask - llvm_i1_ty], // Flag for cache_hint - [IntrConvergent, - WriteOnly>, ReadOnly>, - NoCapture>, NoCapture>, NoCapture>]>; - - def int_nvvm_cp_async_bulk_tensor_s2g_ # mode # _ # dim # d : - DefaultAttrsIntrinsicFlags<[], - !listconcat([llvm_shared_ptr_ty, // src_smem_ptr - llvm_ptr_ty], // tensormap_ptr - tensor_dim_args, // actual tensor dims - [llvm_i64_ty]), // cache_hint - [llvm_i1_ty], // Flag for cache_hint - [IntrConvergent, - ReadOnly>, ReadOnly>, - NoCapture>, NoCapture>]>; - - def int_nvvm_cp_async_bulk_tensor_prefetch_ # mode # _ # dim # d : - DefaultAttrsIntrinsicFlags<[], - !listconcat([llvm_ptr_ty], // tensormap_ptr - tensor_dim_args, // actual tensor dims - im2col_offsets_args, // im2col offsets - [llvm_i64_ty]), // cache_hint - [llvm_i1_ty], // Flag for cache_hint - [IntrConvergent, - ReadOnly>, NoCapture>]>; - - // Intrinsics for TMA Copy with reduction - foreach red_op = ["add", "min", "max", "inc", "dec", "and", "or", "xor"] in - def int_nvvm_cp_async_bulk_tensor_reduce_ # red_op # _ # mode # _ # dim # d : - DefaultAttrsIntrinsicFlags<[], - !listconcat([llvm_shared_ptr_ty, // src_smem_ptr - llvm_ptr_ty], // tensormap_ptr - tensor_dim_args, // actual tensor dims - [llvm_i64_ty]), // cache_hint - [llvm_i1_ty], // Flag for cache_hint - [IntrConvergent, ReadOnly>, ReadOnly>, - NoCapture>, NoCapture>]>; + foreach g2s = [CP_ASYNC_BULK_TENSOR_G2S_INTR] in + def g2s.Name : DefaultAttrsIntrinsic<[], g2s.ArgsTy, g2s.IntrProp>; + foreach s2g = [CP_ASYNC_BULK_TENSOR_S2G_INTR] in + def s2g.Name : DefaultAttrsIntrinsic<[], s2g.ArgsTy, s2g.IntrProp>; + foreach prefetch = [CP_ASYNC_BULK_TENSOR_PREFETCH_INTR] in + def prefetch.Name : DefaultAttrsIntrinsic<[], prefetch.ArgsTy, prefetch.IntrProp>; } } -// Intrinsics for Prefetch and Prefetchu -let IntrProperties = [IntrArgMemOnly, ReadOnly>, NoCapture>] in { - foreach level = ["L1", "L2"] in { - def int_nvvm_prefetch_ # level : Intrinsic<[], [llvm_ptr_ty]>; - def int_nvvm_prefetch_global_ # level : Intrinsic<[], [llvm_global_ptr_ty]>; - def int_nvvm_prefetch_local_ # level : Intrinsic<[], [llvm_local_ptr_ty]>; +// Intrinsics for TMA Copy with reduction +foreach dim = [1, 2, 3, 4, 5] in { + foreach mode = !if(!ge(dim, 3), ["tile", "im2col"], ["tile"]) in { + foreach red_op = ["add", "min", "max", "inc", "dec", "and", "or", "xor"] in { + foreach reduce = [CP_ASYNC_BULK_TENSOR_REDUCE_INTR] in + def reduce.Name : DefaultAttrsIntrinsic<[], reduce.ArgsTy, reduce.IntrProp>; + } } - - foreach eviction_priority = ["evict_normal", "evict_last"] in - def int_nvvm_prefetch_global_L2_ # eviction_priority : Intrinsic<[], [llvm_global_ptr_ty]>; - - def int_nvvm_prefetchu_L1 : Intrinsic<[], [llvm_ptr_ty]>; -} - -// applypriority -let IntrProperties = [IntrArgMemOnly, ReadOnly>, NoCapture>, - ImmArg>] in { - def int_nvvm_applypriority_global_L2_evict_normal - : DefaultAttrsIntrinsic<[], [llvm_global_ptr_ty, llvm_i64_ty]>; - - def int_nvvm_applypriority_L2_evict_normal - : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_i64_ty]>; } -// discard -let IntrProperties = [NoCapture>, ImmArg>, IntrHasSideEffects] in { - def int_nvvm_discard_global_L2 : DefaultAttrsIntrinsic<[], [llvm_global_ptr_ty, llvm_i64_ty]>; - def int_nvvm_discard_L2 : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_i64_ty]>; -} +// Intrinsics for Prefetch and Prefetchu +def int_nvvm_prefetch_L1 : Intrinsic<[], [llvm_ptr_ty], + [IntrArgMemOnly, ReadOnly>, NoCapture>]>; +def int_nvvm_prefetch_L2 : Intrinsic<[], [llvm_ptr_ty], + [IntrArgMemOnly, ReadOnly>, NoCapture>]>; +def int_nvvm_prefetch_global_L1 : Intrinsic<[], [llvm_global_ptr_ty], + [IntrArgMemOnly, ReadOnly>, NoCapture>]>; +def int_nvvm_prefetch_global_L2 : Intrinsic<[], [llvm_global_ptr_ty], + [IntrArgMemOnly, ReadOnly>, NoCapture>]>; +def int_nvvm_prefetch_local_L1 : Intrinsic<[], [llvm_local_ptr_ty], + [IntrArgMemOnly, ReadOnly>, NoCapture>]>; +def int_nvvm_prefetch_local_L2 : Intrinsic<[], [llvm_local_ptr_ty], + [IntrArgMemOnly, ReadOnly>, NoCapture>]>; + +def int_nvvm_prefetch_global_L2_evict_normal : Intrinsic<[], [llvm_global_ptr_ty], + [IntrArgMemOnly, ReadOnly>, NoCapture>]>; +def int_nvvm_prefetch_global_L2_evict_last : Intrinsic<[], [llvm_global_ptr_ty], + [IntrArgMemOnly, ReadOnly>, NoCapture>]>; +def int_nvvm_prefetchu_L1 : Intrinsic<[], [llvm_ptr_ty], + [IntrArgMemOnly, ReadOnly>, NoCapture>]>; + +def int_nvvm_applypriority_global_L2_evict_normal + : DefaultAttrsIntrinsic<[], [llvm_global_ptr_ty, llvm_i64_ty], + [IntrArgMemOnly, ReadOnly>, NoCapture>, + ImmArg>]>; + +def int_nvvm_applypriority_L2_evict_normal + : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_i64_ty], + [IntrArgMemOnly, ReadOnly>, NoCapture>, + ImmArg>]>; + +// Intrinsics for discard +def int_nvvm_discard_global_L2 : DefaultAttrsIntrinsic<[], + [llvm_global_ptr_ty, llvm_i64_ty], [NoCapture>, + ImmArg>, IntrHasSideEffects]>; + +def int_nvvm_discard_L2 : DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_i64_ty], [NoCapture>, + ImmArg>, IntrHasSideEffects]>; // Intrinsics for Bulk Copy using TMA (non-tensor) // From Global to Shared Cluster def int_nvvm_cp_async_bulk_global_to_shared_cluster - : DefaultAttrsIntrinsicFlags<[], + : DefaultAttrsIntrinsic<[], [llvm_shared_cluster_ptr_ty, // dst_shared_cluster_ptr llvm_shared_ptr_ty, // mbarrier_ptr llvm_global_ptr_ty, // src_gmem_ptr llvm_i32_ty, // copy_size llvm_i16_ty, // cta_mask - llvm_i64_ty], // cache_hint - [llvm_i1_ty, // Flag for cta_mask + llvm_i64_ty, // cache_hint + llvm_i1_ty, // Flag for cta_mask llvm_i1_ty], // Flag for cache_hint [IntrConvergent, IntrArgMemOnly, WriteOnly>, ReadOnly>, - NoCapture>, NoCapture>, NoCapture>]>; + NoCapture>, NoCapture>, + NoCapture>, ImmArg>, + ImmArg>]>; // From Shared CTA to Shared Cluster def int_nvvm_cp_async_bulk_shared_cta_to_cluster @@ -2102,38 +3836,27 @@ def int_nvvm_cp_async_bulk_shared_cta_to_cluster // From Shared CTA to Global memory def int_nvvm_cp_async_bulk_shared_cta_to_global - : DefaultAttrsIntrinsicFlags<[], - [llvm_global_ptr_ty, // dst_gmem_ptr - llvm_shared_ptr_ty, // src_smem_ptr - llvm_i32_ty, // copy_size - llvm_i64_ty], // cache_hint - [llvm_i1_ty], // Flag for cache_hint - [IntrConvergent, IntrArgMemOnly, - WriteOnly>, ReadOnly>, - NoCapture>, NoCapture>]>; - -// From Shared CTA to Global memory with bytemask -def int_nvvm_cp_async_bulk_shared_cta_to_global_bytemask : DefaultAttrsIntrinsic<[], [llvm_global_ptr_ty, // dst_gmem_ptr llvm_shared_ptr_ty, // src_smem_ptr llvm_i32_ty, // copy_size llvm_i64_ty, // cache_hint - llvm_i1_ty, // Flag for cache_hint - llvm_i16_ty], // byte_mask + llvm_i1_ty], // Flag for cache_hint [IntrConvergent, IntrArgMemOnly, WriteOnly>, ReadOnly>, + NoCapture>, NoCapture>, ImmArg>]>; // Intrinsics for Bulk Copy Prefetch L2 def int_nvvm_cp_async_bulk_prefetch_L2 - : DefaultAttrsIntrinsicFlags<[], + : DefaultAttrsIntrinsic<[], [llvm_global_ptr_ty, // src_gmem_ptr llvm_i32_ty, // copy_size - llvm_i64_ty], // cache_hint - [llvm_i1_ty], // Flag for cache_hint + llvm_i64_ty, // cache_hint + llvm_i1_ty], // Flag for cache_hint [IntrConvergent, IntrArgMemOnly, - NoCapture>, ReadOnly>]>; + NoCapture>, ReadOnly>, + ImmArg>]>; def int_nvvm_griddepcontrol_launch_dependents : Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>; def int_nvvm_griddepcontrol_wait : Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>; @@ -2232,7 +3955,8 @@ class NVVM_TCGEN05_LD : !listconcat([IntrConvergent, IntrArgMemOnly, NoCapture>], !if(!eq(Shape, "16x32bx2"), [ImmArg>, ImmArg>], - [ImmArg>]))>; + [ImmArg>])), + NVVM_TCGEN05_LDST_NAME<"ld", Shape, Num>.intr>; // Tcgen05 st intrinsics class NVVM_TCGEN05_ST : @@ -2244,28 +3968,32 @@ class NVVM_TCGEN05_ST : !listconcat([IntrConvergent, IntrArgMemOnly, NoCapture>], !if(!eq(Shape, "16x32bx2"), [ImmArg>, ImmArg>], - [ImmArg>]))>; + [ImmArg>])), + NVVM_TCGEN05_LDST_NAME<"st", Shape, Num>.intr>; foreach shape = ["16x64b", "16x128b", "16x256b", "32x32b", "16x32bx2"] in { - foreach num = 0...8 in { + foreach num = !range(0, 8) in { if NVVM_TCGEN05_LDST_ACCESS_SIZE.valid then { - def int_nvvm_tcgen05_ld_ # shape # _x # !shl(1, num) : + def NVVM_TCGEN05_LDST_NAME<"ld", shape, num>.record : NVVM_TCGEN05_LD; - def int_nvvm_tcgen05_st_ # shape # _x # !shl(1, num) : + def NVVM_TCGEN05_LDST_NAME<"st", shape, num>.record : NVVM_TCGEN05_ST; - } + } } } // // Bulk store intrinsics // -let IntrProperties = [IntrArgMemOnly, IntrWriteMem, WriteOnly>, - NoCapture>, ImmArg>] in { - def int_nvvm_st_bulk : - DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty]>; - def int_nvvm_st_bulk_shared_cta : - DefaultAttrsIntrinsic<[], [llvm_shared_ptr_ty, llvm_i64_ty, llvm_i64_ty]>; -} +def int_nvvm_st_bulk : DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty], + [IntrArgMemOnly, IntrWriteMem, + WriteOnly>, NoCapture>, ImmArg>]>; + +def int_nvvm_st_bulk_shared_cta : DefaultAttrsIntrinsic<[], + [llvm_shared_ptr_ty, llvm_i64_ty, llvm_i64_ty], + [IntrArgMemOnly, IntrWriteMem, + WriteOnly>, NoCapture>, ImmArg>]>; + } // let TargetPrefix = "nvvm" diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td index 75fb41fcd381a..18b2883eb00e7 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -1891,4 +1891,3 @@ let TargetPrefix = "riscv" in { include "llvm/IR/IntrinsicsRISCVXTHead.td" include "llvm/IR/IntrinsicsRISCVXsf.td" include "llvm/IR/IntrinsicsRISCVXCV.td" -include "llvm/IR/IntrinsicsRISCVXAndes.td" diff --git a/llvm/include/llvm/IR/IntrinsicsRISCVXAndes.td b/llvm/include/llvm/IR/IntrinsicsRISCVXAndes.td deleted file mode 100644 index d90fe2cd0e6f3..0000000000000 --- a/llvm/include/llvm/IR/IntrinsicsRISCVXAndes.td +++ /dev/null @@ -1,17 +0,0 @@ -//===- IntrinsicsRISCVXAndes.td - Andes intrinsics ---------*- tablegen -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines all of the Andes vendor intrinsics for RISC-V. -// -//===----------------------------------------------------------------------===// - -let TargetPrefix = "riscv" in { - // Andes Vector Packed FP16 Extension - defm nds_vfpmadt : RISCVBinaryAAXRoundingMode; - defm nds_vfpmadb : RISCVBinaryAAXRoundingMode; -} diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index 8d984d6ce58df..404467781b4d0 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -119,11 +119,6 @@ let TargetPrefix = "spv" in { [llvm_any_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty], [IntrNoMem]>; - def int_spv_resource_handlefromimplicitbinding - : DefaultAttrsIntrinsic< - [llvm_any_ty], - [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty], - [IntrNoMem]>; def int_spv_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>; def int_spv_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>; diff --git a/llvm/include/llvm/Support/BranchProbability.h b/llvm/include/llvm/Support/BranchProbability.h index 42fe225709ef8..570531e6b9e92 100644 --- a/llvm/include/llvm/Support/BranchProbability.h +++ b/llvm/include/llvm/Support/BranchProbability.h @@ -77,9 +77,7 @@ class BranchProbability { LLVM_ABI raw_ostream &print(raw_ostream &OS) const; -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - LLVM_DUMP_METHOD void dump() const; -#endif + LLVM_ABI void dump() const; /// Scale a large integer. /// diff --git a/llvm/include/llvm/Support/ConvertUTF.h b/llvm/include/llvm/Support/ConvertUTF.h index 3bb238e7df2ed..dd446f280a483 100644 --- a/llvm/include/llvm/Support/ConvertUTF.h +++ b/llvm/include/llvm/Support/ConvertUTF.h @@ -346,10 +346,6 @@ LLVM_ABI bool convertUTF32ToUTF8String(ArrayRef Src, std::string &Out); LLVM_ABI bool convertUTF8ToUTF16String(StringRef SrcUTF8, SmallVectorImpl &DstUTF16); -bool IsSingleCodeUnitUTF8Codepoint(unsigned); -bool IsSingleCodeUnitUTF16Codepoint(unsigned); -bool IsSingleCodeUnitUTF32Codepoint(unsigned); - #if defined(_WIN32) namespace sys { namespace windows { diff --git a/llvm/include/llvm/Support/DebugCounter.h b/llvm/include/llvm/Support/DebugCounter.h index 9611586a92c3b..529a9f86f2e34 100644 --- a/llvm/include/llvm/Support/DebugCounter.h +++ b/llvm/include/llvm/Support/DebugCounter.h @@ -119,10 +119,8 @@ class DebugCounter { Counter.CurrChunkIdx = State.ChunkIdx; } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) // Dump or print the current counter set into llvm::dbgs(). - LLVM_DUMP_METHOD void dump() const; -#endif + LLVM_ABI LLVM_DUMP_METHOD void dump() const; LLVM_ABI void print(raw_ostream &OS) const; diff --git a/llvm/include/llvm/Support/FileOutputBuffer.h b/llvm/include/llvm/Support/FileOutputBuffer.h index 20999f408e325..d5b731522c11e 100644 --- a/llvm/include/llvm/Support/FileOutputBuffer.h +++ b/llvm/include/llvm/Support/FileOutputBuffer.h @@ -32,8 +32,9 @@ class FileOutputBuffer { /// Set the 'x' bit on the resulting file. F_executable = 1, - /// Use mmap for in-memory file buffer. - F_mmap = 2, + /// Don't use mmap and instead write an in-memory buffer to a file when this + /// buffer is closed. + F_no_mmap = 2, }; /// Factory method to create an OutputBuffer object which manages a read/write diff --git a/llvm/include/llvm/Support/KnownBits.h b/llvm/include/llvm/Support/KnownBits.h index e8dc1c2422646..6a14328d431a4 100644 --- a/llvm/include/llvm/Support/KnownBits.h +++ b/llvm/include/llvm/Support/KnownBits.h @@ -513,10 +513,7 @@ struct KnownBits { bool operator!=(const KnownBits &Other) const { return !(*this == Other); } LLVM_ABI void print(raw_ostream &OS) const; - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - LLVM_DUMP_METHOD void dump() const; -#endif + LLVM_ABI void dump() const; private: // Internal helper for getting the initial KnownBits for an `srem` or `urem` diff --git a/llvm/include/llvm/Support/SMTAPI.h b/llvm/include/llvm/Support/SMTAPI.h index aed6241219c39..f1bb86cf81f1c 100644 --- a/llvm/include/llvm/Support/SMTAPI.h +++ b/llvm/include/llvm/Support/SMTAPI.h @@ -71,9 +71,7 @@ class SMTSort { virtual void print(raw_ostream &OS) const = 0; -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - LLVM_DUMP_METHOD void dump() const; -#endif + LLVM_ABI LLVM_DUMP_METHOD void dump() const; protected: /// Query the SMT solver and returns true if two sorts are equal (same kind @@ -120,9 +118,7 @@ class SMTExpr { virtual void print(raw_ostream &OS) const = 0; -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - LLVM_DUMP_METHOD void dump() const; -#endif + LLVM_ABI LLVM_DUMP_METHOD void dump() const; protected: /// Query the SMT solver and returns true if two sorts are equal (same kind @@ -140,9 +136,7 @@ class SMTSolverStatistics { virtual void print(raw_ostream &OS) const = 0; -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - LLVM_DUMP_METHOD void dump() const; -#endif + LLVM_ABI LLVM_DUMP_METHOD void dump() const; }; /// Shared pointer for SMTExprs, used by SMTSolver API. @@ -158,9 +152,7 @@ class SMTSolver { SMTSolver() = default; virtual ~SMTSolver() = default; -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - LLVM_DUMP_METHOD void dump() const; -#endif + LLVM_ABI LLVM_DUMP_METHOD void dump() const; // Returns an appropriate floating-point sort for the given bitwidth. SMTSortRef getFloatSort(unsigned BitWidth) { diff --git a/llvm/include/llvm/Support/ScaledNumber.h b/llvm/include/llvm/Support/ScaledNumber.h index 3d38677f0eb61..87a56809976a3 100644 --- a/llvm/include/llvm/Support/ScaledNumber.h +++ b/llvm/include/llvm/Support/ScaledNumber.h @@ -424,10 +424,7 @@ class ScaledNumberBase { public: static constexpr int DefaultPrecision = 10; -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - LLVM_DUMP_METHOD static void dump(uint64_t D, int16_t E, int Width); -#endif - + LLVM_ABI static void dump(uint64_t D, int16_t E, int Width); LLVM_ABI static raw_ostream &print(raw_ostream &OS, uint64_t D, int16_t E, int Width, unsigned Precision); LLVM_ABI static std::string toString(uint64_t D, int16_t E, int Width, @@ -610,12 +607,7 @@ template class ScaledNumber : ScaledNumberBase { unsigned Precision = DefaultPrecision) const { return ScaledNumberBase::print(OS, Digits, Scale, Width, Precision); } - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - LLVM_DUMP_METHOD void dump() const { - return ScaledNumberBase::dump(Digits, Scale, Width); - } -#endif + void dump() const { return ScaledNumberBase::dump(Digits, Scale, Width); } ScaledNumber &operator+=(const ScaledNumber &X) { std::tie(Digits, Scale) = diff --git a/llvm/include/llvm/Support/SpecialCaseList.h b/llvm/include/llvm/Support/SpecialCaseList.h index ca2030abdc1f5..4f4c097c7162a 100644 --- a/llvm/include/llvm/Support/SpecialCaseList.h +++ b/llvm/include/llvm/Support/SpecialCaseList.h @@ -132,14 +132,16 @@ class SpecialCaseList { using SectionEntries = StringMap>; struct Section { - Section(std::unique_ptr M) : SectionMatcher(std::move(M)){}; - Section() : Section(std::make_unique()) {} + Section(std::unique_ptr M, StringRef str) + : SectionMatcher(std::move(M)), SectionStr(SectionStr) {}; + Section(StringRef str) : Section(std::make_unique(), str) {}; std::unique_ptr SectionMatcher; SectionEntries Entries; + std::string SectionStr; }; - StringMap
Sections; + std::vector
Sections; LLVM_ABI Expected
addSection(StringRef SectionStr, unsigned LineNo, bool UseGlobs = true); @@ -154,6 +156,6 @@ class SpecialCaseList { StringRef Category) const; }; -} // namespace llvm +} // namespace llvm #endif // LLVM_SUPPORT_SPECIALCASELIST_H diff --git a/llvm/include/llvm/TableGen/DirectiveEmitter.h b/llvm/include/llvm/TableGen/DirectiveEmitter.h index 234979eebc881..e7f712451d482 100644 --- a/llvm/include/llvm/TableGen/DirectiveEmitter.h +++ b/llvm/include/llvm/TableGen/DirectiveEmitter.h @@ -71,10 +71,6 @@ class DirectiveLanguage { return Records.getAllDerivedDefinitions("Category"); } - ArrayRef getSourceLanguages() const { - return Records.getAllDerivedDefinitions("SourceLanguage"); - } - ArrayRef getDirectives() const { return Records.getAllDerivedDefinitions("Directive"); } @@ -113,15 +109,13 @@ class BaseRecord { // Returns the name of the directive formatted for output. Whitespace are // replaced with underscores. - static std::string getFormattedName(const Record *R) { - StringRef Name = R->getValueAsString("name"); + std::string getFormattedName() const { + StringRef Name = Def->getValueAsString("name"); std::string N = Name.str(); llvm::replace(N, ' ', '_'); return N; } - std::string getFormattedName() const { return getFormattedName(Def); } - bool isDefault() const { return Def->getValueAsBit("isDefault"); } // Returns the record name. @@ -163,10 +157,6 @@ class Directive : public BaseRecord { const Record *getCategory() const { return Def->getValueAsDef("category"); } - std::vector getSourceLanguages() const { - return Def->getValueAsListOfDefs("languages"); - } - // Clang uses a different format for names of its directives enum. std::string getClangAccSpelling() const { std::string Name = Def->getValueAsString("name").str(); diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index 406baa4f5fdaa..41fed692c7025 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -860,12 +860,6 @@ def find_last_active : SDNode<"ISD::VECTOR_FIND_LAST_ACTIVE", SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>, []>; -def get_active_lane_mask - : SDNode< - "ISD::GET_ACTIVE_LANE_MASK", - SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<1>, SDTCisSameAs<1, 2>]>, - []>; - // Nodes for intrinsics, you should use the intrinsic itself and let tblgen use // these internally. Don't reference these directly. def intrinsic_void : SDNode<"ISD::INTRINSIC_VOID", @@ -881,7 +875,6 @@ def SDT_assert : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 0>]>; def assertsext : SDNode<"ISD::AssertSext", SDT_assert>; def assertzext : SDNode<"ISD::AssertZext", SDT_assert>; -def assertnofpclass : SDNode<"ISD::AssertNoFPClass", SDTFPUnaryOp>; def assertalign : SDNode<"ISD::AssertAlign", SDT_assert>; def convergencectrl_anchor : SDNode<"ISD::CONVERGENCECTRL_ANCHOR", diff --git a/llvm/include/llvm/Transforms/Utils/LoopPeel.h b/llvm/include/llvm/Transforms/Utils/LoopPeel.h index dd59a9c766e45..0b78700ca71bb 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopPeel.h +++ b/llvm/include/llvm/Transforms/Utils/LoopPeel.h @@ -21,18 +21,11 @@ namespace llvm { bool canPeel(const Loop *L); -/// Returns true if the last iteration of \p L can be peeled off. It makes sure -/// the loop exit condition can be adjusted when peeling and that the loop -/// executes at least 2 iterations. -bool canPeelLastIteration(const Loop &L, ScalarEvolution &SE); - /// VMap is the value-map that maps instructions from the original loop to -/// instructions in the last peeled-off iteration. If \p PeelLast is true, peel -/// off the last \p PeelCount iterations from \p L (canPeelLastIteration must be -/// true for \p L), otherwise peel off the first \p PeelCount iterations. -bool peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, - ScalarEvolution *SE, DominatorTree &DT, AssumptionCache *AC, - bool PreserveLCSSA, ValueToValueMapTy &VMap); +/// instructions in the last peeled-off iteration. +bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, + DominatorTree &DT, AssumptionCache *AC, bool PreserveLCSSA, + ValueToValueMapTy &VMap); TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, diff --git a/llvm/include/llvm/Transforms/Vectorize/EVLIndVarSimplify.h b/llvm/include/llvm/Transforms/Vectorize/EVLIndVarSimplify.h deleted file mode 100644 index 3178dc762a195..0000000000000 --- a/llvm/include/llvm/Transforms/Vectorize/EVLIndVarSimplify.h +++ /dev/null @@ -1,31 +0,0 @@ -//===------ EVLIndVarSimplify.h - Optimize vectorized loops w/ EVL IV------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This pass optimizes a vectorized loop with canonical IV to using EVL-based -// IV if it was tail-folded by predicated EVL. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TRANSFORMS_VECTORIZE_EVLINDVARSIMPLIFY_H -#define LLVM_TRANSFORMS_VECTORIZE_EVLINDVARSIMPLIFY_H - -#include "llvm/Analysis/LoopAnalysisManager.h" -#include "llvm/IR/PassManager.h" - -namespace llvm { -class Loop; -class LPMUpdater; - -/// Turn vectorized loops with canonical induction variables into loops that -/// only use a single EVL-based induction variable. -struct EVLIndVarSimplifyPass : public PassInfoMixin { - PreservedAnalyses run(Loop &L, LoopAnalysisManager &LAM, - LoopStandardAnalysisResults &AR, LPMUpdater &U); -}; -} // namespace llvm -#endif diff --git a/llvm/lib/Analysis/AliasAnalysis.cpp b/llvm/lib/Analysis/AliasAnalysis.cpp index 2afabb75c7cc5..27bd179a58ede 100644 --- a/llvm/lib/Analysis/AliasAnalysis.cpp +++ b/llvm/lib/Analysis/AliasAnalysis.cpp @@ -693,8 +693,8 @@ AnalysisKey AAManager::Key; ExternalAAWrapperPass::ExternalAAWrapperPass() : ImmutablePass(ID) {} -ExternalAAWrapperPass::ExternalAAWrapperPass(CallbackT CB, bool RunEarly) - : ImmutablePass(ID), CB(std::move(CB)), RunEarly(RunEarly) {} +ExternalAAWrapperPass::ExternalAAWrapperPass(CallbackT CB) + : ImmutablePass(ID), CB(std::move(CB)) {} char ExternalAAWrapperPass::ID = 0; @@ -741,7 +741,7 @@ bool AAResultsWrapperPass::runOnFunction(Function &F) { // Add any target-specific alias analyses that should be run early. auto *ExtWrapperPass = getAnalysisIfAvailable(); - if (ExtWrapperPass && ExtWrapperPass->RunEarly && ExtWrapperPass->CB) { + if (ExtWrapperPass && ExtWrapperPass->runEarly() && ExtWrapperPass->CB) { LLVM_DEBUG(dbgs() << "AAResults register Early ExternalAA: " << ExtWrapperPass->getPassName() << "\n"); ExtWrapperPass->CB(*this, F, *AAR); @@ -777,7 +777,7 @@ bool AAResultsWrapperPass::runOnFunction(Function &F) { // If available, run an external AA providing callback over the results as // well. - if (ExtWrapperPass && !ExtWrapperPass->RunEarly && ExtWrapperPass->CB) { + if (ExtWrapperPass && !ExtWrapperPass->runEarly() && ExtWrapperPass->CB) { LLVM_DEBUG(dbgs() << "AAResults register Late ExternalAA: " << ExtWrapperPass->getPassName() << "\n"); ExtWrapperPass->CB(*this, F, *AAR); diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index ab407e945bc53..af1a3c593c514 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1541,11 +1541,11 @@ llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, return std::nullopt; } -std::optional llvm::getPointersDiff(Type *ElemTyA, Value *PtrA, - Type *ElemTyB, Value *PtrB, - const DataLayout &DL, - ScalarEvolution &SE, - bool StrictCheck, bool CheckType) { +std::optional llvm::getPointersDiff(Type *ElemTyA, Value *PtrA, + Type *ElemTyB, Value *PtrB, + const DataLayout &DL, + ScalarEvolution &SE, bool StrictCheck, + bool CheckType) { assert(PtrA && PtrB && "Expected non-nullptr pointers."); // Make sure that A and B are different pointers. @@ -1570,7 +1570,7 @@ std::optional llvm::getPointersDiff(Type *ElemTyA, Value *PtrA, const Value *PtrB1 = PtrB->stripAndAccumulateConstantOffsets( DL, OffsetB, /*AllowNonInbounds=*/true); - std::optional Val; + int Val; if (PtrA1 == PtrB1) { // Retrieve the address space again as pointer stripping now tracks through // `addrspacecast`. @@ -1585,7 +1585,7 @@ std::optional llvm::getPointersDiff(Type *ElemTyA, Value *PtrA, OffsetB = OffsetB.sextOrTrunc(IdxWidth); OffsetB -= OffsetA; - Val = OffsetB.trySExtValue(); + Val = OffsetB.getSExtValue(); } else { // Otherwise compute the distance with SCEV between the base pointers. const SCEV *PtrSCEVA = SE.getSCEV(PtrA); @@ -1594,14 +1594,10 @@ std::optional llvm::getPointersDiff(Type *ElemTyA, Value *PtrA, SE.computeConstantDifference(PtrSCEVB, PtrSCEVA); if (!Diff) return std::nullopt; - Val = Diff->trySExtValue(); + Val = Diff->getSExtValue(); } - - if (!Val) - return std::nullopt; - - int64_t Size = DL.getTypeStoreSize(ElemTyA); - int64_t Dist = *Val / Size; + int Size = DL.getTypeStoreSize(ElemTyA); + int Dist = Val / Size; // Ensure that the calculated distance matches the type-based one after all // the bitcasts removal in the provided pointers. @@ -1620,15 +1616,14 @@ bool llvm::sortPtrAccesses(ArrayRef VL, Type *ElemTy, // first pointer in the array. Value *Ptr0 = VL[0]; - using DistOrdPair = std::pair; + using DistOrdPair = std::pair; auto Compare = llvm::less_first(); std::set Offsets(Compare); Offsets.emplace(0, 0); bool IsConsecutive = true; for (auto [Idx, Ptr] : drop_begin(enumerate(VL))) { - std::optional Diff = - getPointersDiff(ElemTy, Ptr0, ElemTy, Ptr, DL, SE, - /*StrictCheck=*/true); + std::optional Diff = getPointersDiff(ElemTy, Ptr0, ElemTy, Ptr, DL, SE, + /*StrictCheck=*/true); if (!Diff) return false; @@ -1659,7 +1654,7 @@ bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, return false; Type *ElemTyA = getLoadStoreType(A); Type *ElemTyB = getLoadStoreType(B); - std::optional Diff = + std::optional Diff = getPointersDiff(ElemTyA, PtrA, ElemTyB, PtrB, DL, SE, /*StrictCheck=*/true, CheckType); return Diff && *Diff == 1; diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 8405678aa9680..3d403531cea2f 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -7786,7 +7786,10 @@ static bool canCreateUndefOrPoison(const Operator *Op, UndefPoisonKind Kind, case Instruction::FMul: case Instruction::FDiv: case Instruction::FRem: + return false; case Instruction::GetElementPtr: + // inbounds is handled above + // TODO: what about inrange on constexpr? return false; default: { const auto *CE = dyn_cast(Op); diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 76f27623c8656..2c53a9c27ccb2 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -5771,35 +5771,6 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) { return false; } -// Find an insert position of Addr for MemoryInst. We can't guarantee MemoryInst -// is the first instruction that will use Addr. So we need to find the first -// user of Addr in current BB. -static BasicBlock::iterator findInsertPos(Value *Addr, Instruction *MemoryInst, - Value *SunkAddr) { - if (Addr->hasOneUse()) - return MemoryInst->getIterator(); - - // We already have a SunkAddr in current BB, but we may need to insert cast - // instruction after it. - if (SunkAddr) { - if (Instruction *AddrInst = dyn_cast(SunkAddr)) - return std::next(AddrInst->getIterator()); - } - - // Find the first user of Addr in current BB. - Instruction *Earliest = MemoryInst; - for (User *U : Addr->users()) { - Instruction *UserInst = dyn_cast(U); - if (UserInst && UserInst->getParent() == MemoryInst->getParent()) { - if (isa(UserInst) || UserInst->isDebugOrPseudoInst()) - continue; - if (UserInst->comesBefore(Earliest)) - Earliest = UserInst; - } - } - return Earliest->getIterator(); -} - /// Sink addressing mode computation immediate before MemoryInst if doing so /// can be done without increasing register pressure. The need for the /// register pressure constraint means this can end up being an all or nothing @@ -5924,6 +5895,11 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, return Modified; } + // Insert this computation right after this user. Since our caller is + // scanning from the top of the BB to the bottom, reuse of the expr are + // guaranteed to happen later. + IRBuilder<> Builder(MemoryInst); + // Now that we determined the addressing expression we want to use and know // that we have to sink it into this block. Check to see if we have already // done this for some other load/store instr in this block. If so, reuse @@ -5934,13 +5910,6 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr; Type *IntPtrTy = DL->getIntPtrType(Addr->getType()); - - // The current BB may be optimized multiple times, we can't guarantee the - // reuse of Addr happens later, call findInsertPos to find an appropriate - // insert position. - IRBuilder<> Builder(MemoryInst->getParent(), - findInsertPos(Addr, MemoryInst, SunkAddr)); - if (SunkAddr) { LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " << *MemoryInst << "\n"); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 8671efcfd2fb1..d6e288a59b2ee 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10972,22 +10972,6 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return DAG.getNode(ISD::SRL, DL, VT, N0, NewOp1); } - // fold (srl (logic_op x, (shl (zext y), c1)), c1) - // -> (logic_op (srl x, c1), (zext y)) - // c1 <= leadingzeros(zext(y)) - SDValue X, ZExtY; - if (N1C && sd_match(N0, m_OneUse(m_BitwiseLogic( - m_Value(X), - m_OneUse(m_Shl(m_AllOf(m_Value(ZExtY), - m_Opc(ISD::ZERO_EXTEND)), - m_Specific(N1))))))) { - unsigned NumLeadingZeros = ZExtY.getScalarValueSizeInBits() - - ZExtY.getOperand(0).getScalarValueSizeInBits(); - if (N1C->getZExtValue() <= NumLeadingZeros) - return DAG.getNode(N0.getOpcode(), SDLoc(N0), VT, - DAG.getNode(ISD::SRL, SDLoc(N0), VT, X, N1), ZExtY); - } - // fold operands of srl based on knowledge that the low bits are not // demanded. if (SimplifyDemandedBits(SDValue(N, 0))) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 08dce6a2fc9e5..432209e8ecb0a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -168,7 +168,6 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::POISON: case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break; case ISD::VAARG: R = SoftenFloatRes_VAARG(N); break; - case ISD::AssertNoFPClass: R = GetSoftenedFloat(N->getOperand(0)); break; case ISD::VECREDUCE_FADD: case ISD::VECREDUCE_FMUL: case ISD::VECREDUCE_FMIN: @@ -2583,7 +2582,6 @@ bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) { case ISD::LLROUND: case ISD::LRINT: case ISD::LLRINT: R = PromoteFloatOp_UnaryOp(N, OpNo); break; - case ISD::AssertNoFPClass: R = PromoteFloatOp_AssertNoFPClass(N, OpNo); break; case ISD::FP_TO_SINT_SAT: case ISD::FP_TO_UINT_SAT: R = PromoteFloatOp_FP_TO_XINT_SAT(N, OpNo); break; @@ -2642,12 +2640,6 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_UnaryOp(SDNode *N, unsigned OpNo) { return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), Op); } -// Convert the promoted float value to the desired integer type -SDValue DAGTypeLegalizer::PromoteFloatOp_AssertNoFPClass(SDNode *N, - unsigned OpNo) { - return GetPromotedFloat(N->getOperand(0)); -} - SDValue DAGTypeLegalizer::PromoteFloatOp_FP_TO_XINT_SAT(SDNode *N, unsigned OpNo) { SDValue Op = GetPromotedFloat(N->getOperand(0)); @@ -2812,9 +2804,6 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { case ISD::FTAN: case ISD::FTANH: case ISD::FCANONICALIZE: R = PromoteFloatRes_UnaryOp(N); break; - case ISD::AssertNoFPClass: - R = PromoteFloatRes_AssertNoFPClass(N); - break; // Binary FP Operations case ISD::FADD: @@ -3007,14 +2996,8 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_UnaryOp(SDNode *N) { EVT VT = N->getValueType(0); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); SDValue Op = GetPromotedFloat(N->getOperand(0)); - return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op); -} -// Unary operation with a more non-float operand where the result and the -// operand have PromoteFloat type action. Construct a new SDNode with the -// promoted float value of the old operand. -SDValue DAGTypeLegalizer::PromoteFloatRes_AssertNoFPClass(SDNode *N) { - return GetPromotedFloat(N->getOperand(0)); + return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op); } // Binary operations where the result and both operands have PromoteFloat type @@ -3298,9 +3281,6 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { case ISD::FTAN: case ISD::FTANH: case ISD::FCANONICALIZE: R = SoftPromoteHalfRes_UnaryOp(N); break; - case ISD::AssertNoFPClass: - R = SoftPromoteHalfRes_AssertNoFPClass(N); - break; // Binary FP Operations case ISD::FADD: @@ -3627,10 +3607,6 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_UnaryOp(SDNode *N) { return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res); } -SDValue DAGTypeLegalizer::SoftPromoteHalfRes_AssertNoFPClass(SDNode *N) { - return GetSoftPromotedHalf(N->getOperand(0)); -} - SDValue DAGTypeLegalizer::SoftPromoteHalfRes_BinOp(SDNode *N) { EVT OVT = N->getValueType(0); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 9ccd6a4d1684c..25e74a2ae5b71 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -160,10 +160,6 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { Res = PromoteIntRes_VECTOR_FIND_LAST_ACTIVE(N); break; - case ISD::GET_ACTIVE_LANE_MASK: - Res = PromoteIntRes_GET_ACTIVE_LANE_MASK(N); - break; - case ISD::PARTIAL_REDUCE_UMLA: case ISD::PARTIAL_REDUCE_SMLA: Res = PromoteIntRes_PARTIAL_REDUCE_MLA(N); @@ -6226,12 +6222,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_FIND_LAST_ACTIVE(SDNode *N) { return DAG.getNode(ISD::VECTOR_FIND_LAST_ACTIVE, SDLoc(N), NVT, N->ops()); } -SDValue DAGTypeLegalizer::PromoteIntRes_GET_ACTIVE_LANE_MASK(SDNode *N) { - EVT VT = N->getValueType(0); - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); - return DAG.getNode(ISD::GET_ACTIVE_LANE_MASK, SDLoc(N), NVT, N->ops()); -} - SDValue DAGTypeLegalizer::PromoteIntRes_PARTIAL_REDUCE_MLA(SDNode *N) { SDLoc DL(N); EVT VT = N->getValueType(0); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index cf3a9e23f4878..720393158aa5e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -379,7 +379,6 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue PromoteIntRes_IS_FPCLASS(SDNode *N); SDValue PromoteIntRes_PATCHPOINT(SDNode *N); SDValue PromoteIntRes_VECTOR_FIND_LAST_ACTIVE(SDNode *N); - SDValue PromoteIntRes_GET_ACTIVE_LANE_MASK(SDNode *N); SDValue PromoteIntRes_PARTIAL_REDUCE_MLA(SDNode *N); // Integer Operand Promotion. @@ -773,7 +772,6 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue PromoteFloatRes_SELECT(SDNode *N); SDValue PromoteFloatRes_SELECT_CC(SDNode *N); SDValue PromoteFloatRes_UnaryOp(SDNode *N); - SDValue PromoteFloatRes_AssertNoFPClass(SDNode *N); SDValue PromoteFloatRes_UNDEF(SDNode *N); SDValue BitcastToInt_ATOMIC_SWAP(SDNode *N); SDValue PromoteFloatRes_XINT_TO_FP(SDNode *N); @@ -787,7 +785,6 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_STRICT_FP_EXTEND(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_UnaryOp(SDNode *N, unsigned OpNo); - SDValue PromoteFloatOp_AssertNoFPClass(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_FP_TO_XINT_SAT(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_STORE(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_ATOMIC_STORE(SDNode *N, unsigned OpNo); @@ -823,7 +820,6 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SoftPromoteHalfRes_SELECT(SDNode *N); SDValue SoftPromoteHalfRes_SELECT_CC(SDNode *N); SDValue SoftPromoteHalfRes_UnaryOp(SDNode *N); - SDValue SoftPromoteHalfRes_AssertNoFPClass(SDNode *N); SDValue SoftPromoteHalfRes_XINT_TO_FP(SDNode *N); SDValue SoftPromoteHalfRes_UNDEF(SDNode *N); SDValue SoftPromoteHalfRes_VECREDUCE(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 0c0e700f6abca..ee31baac7b321 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -61,7 +61,6 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::AssertZext: case ISD::AssertSext: case ISD::FPOWI: - case ISD::AssertNoFPClass: R = ScalarizeVecRes_UnaryOpWithExtraInput(N); break; case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; @@ -1277,7 +1276,6 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::UINT_TO_FP: case ISD::VP_UINT_TO_FP: case ISD::FCANONICALIZE: - case ISD::AssertNoFPClass: SplitVecRes_UnaryOp(N, Lo, Hi); break; case ISD::ADDRSPACECAST: @@ -2616,7 +2614,7 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, const SDNodeFlags Flags = N->getFlags(); unsigned Opcode = N->getOpcode(); if (N->getNumOperands() <= 2) { - if (Opcode == ISD::FP_ROUND || Opcode == ISD::AssertNoFPClass) { + if (Opcode == ISD::FP_ROUND) { Lo = DAG.getNode(Opcode, dl, LoVT, Lo, N->getOperand(1), Flags); Hi = DAG.getNode(Opcode, dl, HiVT, Hi, N->getOperand(1), Flags); } else { @@ -4874,7 +4872,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FREEZE: case ISD::ARITH_FENCE: case ISD::FCANONICALIZE: - case ISD::AssertNoFPClass: Res = WidenVecRes_Unary(N); break; case ISD::FMA: case ISD::VP_FMA: @@ -5619,9 +5616,6 @@ SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) { SDValue InOp = GetWidenedVector(N->getOperand(0)); if (N->getNumOperands() == 1) return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, N->getFlags()); - if (N->getOpcode() == ISD::AssertNoFPClass) - return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, - N->getOperand(1), N->getFlags()); assert(N->getNumOperands() == 3 && "Unexpected number of operands!"); assert(N->isVPOpcode() && "Expected VP opcode"); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 5d640c39a56d5..bbf1b0fd590ef 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5831,15 +5831,6 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, return false; return true; } - case ISD::AssertNoFPClass: { - FPClassTest NoFPClass = - static_cast(Op.getConstantOperandVal(1)); - if ((NoFPClass & fcNan) == fcNan) - return true; - if (SNaN && (NoFPClass & fcSNan) == fcSNan) - return true; - return isKnownNeverNaN(Op.getOperand(0), DemandedElts, SNaN, Depth + 1); - } default: if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN || Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::INTRINSIC_VOID) { @@ -7499,17 +7490,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, N2.getOpcode() == ISD::TargetConstant && "Invalid FP_ROUND!"); if (N1.getValueType() == VT) return N1; // noop conversion. break; - case ISD::AssertNoFPClass: { - assert(N1.getValueType().isFloatingPoint() && - "AssertNoFPClass is used for a non-floating type"); - assert(isa(N2) && "NoFPClass is not Constant"); - FPClassTest NoFPClass = static_cast(N2->getAsZExtVal()); - assert(llvm::to_underlying(NoFPClass) <= - BitmaskEnumDetail::Mask() && - "FPClassTest value too large"); - (void)NoFPClass; - break; - } case ISD::AssertSext: case ISD::AssertZext: { EVT EVT = cast(N2)->getVT(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 3ebd3a4b88097..8e74a076cc013 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -7987,15 +7987,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::get_active_lane_mask: { EVT CCVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); SDValue Index = getValue(I.getOperand(0)); - SDValue TripCount = getValue(I.getOperand(1)); EVT ElementVT = Index.getValueType(); if (!TLI.shouldExpandGetActiveLaneMask(CCVT, ElementVT)) { - setValue(&I, DAG.getNode(ISD::GET_ACTIVE_LANE_MASK, sdl, CCVT, Index, - TripCount)); + visitTargetIntrinsic(I, Intrinsic); return; } + SDValue TripCount = getValue(I.getOperand(1)); EVT VecTy = EVT::getVectorVT(*DAG.getContext(), ElementVT, CCVT.getVectorElementCount()); @@ -11804,18 +11803,9 @@ void SelectionDAGISel::LowerArguments(const Function &F) { else if (Arg.hasAttribute(Attribute::ZExt)) AssertOp = ISD::AssertZext; - SDValue OutVal = - getCopyFromParts(DAG, dl, &InVals[i], NumParts, PartVT, VT, nullptr, - NewRoot, F.getCallingConv(), AssertOp); - - FPClassTest NoFPClass = Arg.getNoFPClass(); - if (NoFPClass != fcNone) { - SDValue SDNoFPClass = DAG.getTargetConstant( - static_cast(NoFPClass), dl, MVT::i32); - OutVal = DAG.getNode(ISD::AssertNoFPClass, dl, OutVal.getValueType(), - OutVal, SDNoFPClass); - } - ArgValues.push_back(OutVal); + ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts, + PartVT, VT, nullptr, NewRoot, + F.getCallingConv(), AssertOp)); } i += NumParts; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 803894e298dd5..8faf97271d99e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -124,7 +124,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::TokenFactor: return "TokenFactor"; case ISD::AssertSext: return "AssertSext"; case ISD::AssertZext: return "AssertZext"; - case ISD::AssertNoFPClass: return "AssertNoFPClass"; case ISD::AssertAlign: return "AssertAlign"; case ISD::BasicBlock: return "BasicBlock"; @@ -577,9 +576,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::VECTOR_FIND_LAST_ACTIVE: return "find_last_active"; - case ISD::GET_ACTIVE_LANE_MASK: - return "get_active_lane_mask"; - case ISD::PARTIAL_REDUCE_UMLA: return "partial_reduce_umla"; case ISD::PARTIAL_REDUCE_SMLA: diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 586728a44571e..1bc30336a02bf 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -3264,7 +3264,6 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, return; case ISD::AssertSext: case ISD::AssertZext: - case ISD::AssertNoFPClass: case ISD::AssertAlign: ReplaceUses(SDValue(NodeToMatch, 0), NodeToMatch->getOperand(0)); CurDAG->RemoveDeadNode(NodeToMatch); diff --git a/llvm/lib/DebugInfo/LogicalView/Readers/LVDWARFReader.cpp b/llvm/lib/DebugInfo/LogicalView/Readers/LVDWARFReader.cpp index 37bc60d4045c7..42da957233667 100644 --- a/llvm/lib/DebugInfo/LogicalView/Readers/LVDWARFReader.cpp +++ b/llvm/lib/DebugInfo/LogicalView/Readers/LVDWARFReader.cpp @@ -913,7 +913,7 @@ Error LVDWARFReader::createScopes() { LT->getFileNameByIndex( 1, None, DILineInfoSpecifier::FileLineInfoKind::RawValue, FileOne); - return FileZero != FileOne; + return FileZero.compare(FileOne); } } diff --git a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp index 369a047f65076..90194d7fcc119 100644 --- a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp +++ b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp @@ -487,7 +487,7 @@ MachOPlatform::MachOPlatform( if ((Err = ES.getBootstrapMapValue("darwin-use-ehframes-only", ForceEHFrames))) return; - this->ForceEHFrames = ForceEHFrames.value_or(false); + this->ForceEHFrames = ForceEHFrames.has_value() ? *ForceEHFrames : false; } BootstrapInfo BI; diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 41aa06add6aba..8717cd092b0b5 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -646,8 +646,8 @@ static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, if (Name == "thread.pointer") { // '(arm|aarch64).thread.pointer'. - NewFn = Intrinsic::getOrInsertDeclaration( - F->getParent(), Intrinsic::thread_pointer, F->getReturnType()); + NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), + Intrinsic::thread_pointer); return true; } @@ -1475,14 +1475,6 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, } break; - case 't': - if (Name == "thread.pointer") { - NewFn = Intrinsic::getOrInsertDeclaration( - F->getParent(), Intrinsic::thread_pointer, F->getReturnType()); - return true; - } - break; - case 'v': { if (Name == "var.annotation" && F->arg_size() == 4) { rename(F); @@ -1613,7 +1605,7 @@ GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) { auto Ctor = cast(Init->getOperand(i)); NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u), Ctor->getAggregateElement(1), - ConstantPointerNull::get(IRB.getPtrTy())); + Constant::getNullValue(IRB.getPtrTy())); } Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors); @@ -4721,10 +4713,10 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { } // Create a new call with an added null annotation attribute argument. - NewCall = Builder.CreateCall( - NewFn, - {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), - CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())}); + NewCall = + Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), CI->getArgOperand(3), + Constant::getNullValue(Builder.getPtrTy())}); NewCall->takeName(CI); CI->replaceAllUsesWith(NewCall); CI->eraseFromParent(); @@ -4737,10 +4729,10 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { return; } // Create a new call with an added null annotation attribute argument. - NewCall = Builder.CreateCall( - NewFn, - {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), - CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())}); + NewCall = + Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), CI->getArgOperand(3), + Constant::getNullValue(Builder.getPtrTy())}); NewCall->takeName(CI); CI->replaceAllUsesWith(NewCall); CI->eraseFromParent(); diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp index 580b0af709337..8adb85ec6091a 100644 --- a/llvm/lib/IR/IRBuilder.cpp +++ b/llvm/lib/IR/IRBuilder.cpp @@ -169,7 +169,8 @@ Value *IRBuilderBase::CreateStepVector(Type *DstType, const Twine &Name) { CallInst *IRBuilderBase::CreateMemSet(Value *Ptr, Value *Val, Value *Size, MaybeAlign Align, bool isVolatile, - const AAMDNodes &AAInfo) { + MDNode *TBAATag, MDNode *ScopeTag, + MDNode *NoAliasTag) { Value *Ops[] = {Ptr, Val, Size, getInt1(isVolatile)}; Type *Tys[] = {Ptr->getType(), Size->getType()}; @@ -177,14 +178,25 @@ CallInst *IRBuilderBase::CreateMemSet(Value *Ptr, Value *Val, Value *Size, if (Align) cast(CI)->setDestAlignment(*Align); - CI->setAAMetadata(AAInfo); + + // Set the TBAA info if present. + if (TBAATag) + CI->setMetadata(LLVMContext::MD_tbaa, TBAATag); + + if (ScopeTag) + CI->setMetadata(LLVMContext::MD_alias_scope, ScopeTag); + + if (NoAliasTag) + CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag); + return CI; } CallInst *IRBuilderBase::CreateMemSetInline(Value *Dst, MaybeAlign DstAlign, Value *Val, Value *Size, - bool IsVolatile, - const AAMDNodes &AAInfo) { + bool IsVolatile, MDNode *TBAATag, + MDNode *ScopeTag, + MDNode *NoAliasTag) { Value *Ops[] = {Dst, Val, Size, getInt1(IsVolatile)}; Type *Tys[] = {Dst->getType(), Size->getType()}; @@ -192,13 +204,23 @@ CallInst *IRBuilderBase::CreateMemSetInline(Value *Dst, MaybeAlign DstAlign, if (DstAlign) cast(CI)->setDestAlignment(*DstAlign); - CI->setAAMetadata(AAInfo); + + // Set the TBAA info if present. + if (TBAATag) + CI->setMetadata(LLVMContext::MD_tbaa, TBAATag); + + if (ScopeTag) + CI->setMetadata(LLVMContext::MD_alias_scope, ScopeTag); + + if (NoAliasTag) + CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag); + return CI; } CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemSet( Value *Ptr, Value *Val, Value *Size, Align Alignment, uint32_t ElementSize, - const AAMDNodes &AAInfo) { + MDNode *TBAATag, MDNode *ScopeTag, MDNode *NoAliasTag) { Value *Ops[] = {Ptr, Val, Size, getInt32(ElementSize)}; Type *Tys[] = {Ptr->getType(), Size->getType()}; @@ -207,15 +229,24 @@ CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemSet( CreateIntrinsic(Intrinsic::memset_element_unordered_atomic, Tys, Ops); cast(CI)->setDestAlignment(Alignment); - CI->setAAMetadata(AAInfo); + + // Set the TBAA info if present. + if (TBAATag) + CI->setMetadata(LLVMContext::MD_tbaa, TBAATag); + + if (ScopeTag) + CI->setMetadata(LLVMContext::MD_alias_scope, ScopeTag); + + if (NoAliasTag) + CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag); + return CI; } -CallInst *IRBuilderBase::CreateMemTransferInst(Intrinsic::ID IntrID, Value *Dst, - MaybeAlign DstAlign, Value *Src, - MaybeAlign SrcAlign, Value *Size, - bool isVolatile, - const AAMDNodes &AAInfo) { +CallInst *IRBuilderBase::CreateMemTransferInst( + Intrinsic::ID IntrID, Value *Dst, MaybeAlign DstAlign, Value *Src, + MaybeAlign SrcAlign, Value *Size, bool isVolatile, MDNode *TBAATag, + MDNode *TBAAStructTag, MDNode *ScopeTag, MDNode *NoAliasTag) { assert((IntrID == Intrinsic::memcpy || IntrID == Intrinsic::memcpy_inline || IntrID == Intrinsic::memmove) && "Unexpected intrinsic ID"); @@ -229,13 +260,28 @@ CallInst *IRBuilderBase::CreateMemTransferInst(Intrinsic::ID IntrID, Value *Dst, MCI->setDestAlignment(*DstAlign); if (SrcAlign) MCI->setSourceAlignment(*SrcAlign); - MCI->setAAMetadata(AAInfo); + + // Set the TBAA info if present. + if (TBAATag) + CI->setMetadata(LLVMContext::MD_tbaa, TBAATag); + + // Set the TBAA Struct info if present. + if (TBAAStructTag) + CI->setMetadata(LLVMContext::MD_tbaa_struct, TBAAStructTag); + + if (ScopeTag) + CI->setMetadata(LLVMContext::MD_alias_scope, ScopeTag); + + if (NoAliasTag) + CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag); + return CI; } CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemCpy( Value *Dst, Align DstAlign, Value *Src, Align SrcAlign, Value *Size, - uint32_t ElementSize, const AAMDNodes &AAInfo) { + uint32_t ElementSize, MDNode *TBAATag, MDNode *TBAAStructTag, + MDNode *ScopeTag, MDNode *NoAliasTag) { assert(DstAlign >= ElementSize && "Pointer alignment must be at least element size"); assert(SrcAlign >= ElementSize && @@ -250,7 +296,21 @@ CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemCpy( auto *AMCI = cast(CI); AMCI->setDestAlignment(DstAlign); AMCI->setSourceAlignment(SrcAlign); - AMCI->setAAMetadata(AAInfo); + + // Set the TBAA info if present. + if (TBAATag) + CI->setMetadata(LLVMContext::MD_tbaa, TBAATag); + + // Set the TBAA Struct info if present. + if (TBAAStructTag) + CI->setMetadata(LLVMContext::MD_tbaa_struct, TBAAStructTag); + + if (ScopeTag) + CI->setMetadata(LLVMContext::MD_alias_scope, ScopeTag); + + if (NoAliasTag) + CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag); + return CI; } @@ -334,7 +394,8 @@ CallInst *IRBuilderBase::CreateFree(Value *Source, CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemMove( Value *Dst, Align DstAlign, Value *Src, Align SrcAlign, Value *Size, - uint32_t ElementSize, const AAMDNodes &AAInfo) { + uint32_t ElementSize, MDNode *TBAATag, MDNode *TBAAStructTag, + MDNode *ScopeTag, MDNode *NoAliasTag) { assert(DstAlign >= ElementSize && "Pointer alignment must be at least element size"); assert(SrcAlign >= ElementSize && @@ -348,7 +409,21 @@ CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemMove( // Set the alignment of the pointer args. CI->addParamAttr(0, Attribute::getWithAlignment(CI->getContext(), DstAlign)); CI->addParamAttr(1, Attribute::getWithAlignment(CI->getContext(), SrcAlign)); - CI->setAAMetadata(AAInfo); + + // Set the TBAA info if present. + if (TBAATag) + CI->setMetadata(LLVMContext::MD_tbaa, TBAATag); + + // Set the TBAA Struct info if present. + if (TBAAStructTag) + CI->setMetadata(LLVMContext::MD_tbaa_struct, TBAAStructTag); + + if (ScopeTag) + CI->setMetadata(LLVMContext::MD_alias_scope, ScopeTag); + + if (NoAliasTag) + CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag); + return CI; } diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 73b4274a41ee6..7979e197a8de6 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -6553,14 +6553,6 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { &Call); break; } - case Intrinsic::thread_pointer: { - Check(Call.getType()->getPointerAddressSpace() == - DL.getDefaultGlobalsAddressSpace(), - "llvm.thread.pointer intrinsic return type must be for the globals " - "address space", - &Call); - break; - } case Intrinsic::threadlocal_address: { const Value &Arg0 = *Call.getArgOperand(0); Check(isa(Arg0), diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp index b7db70b99bcbc..8a85ac835000a 100644 --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -439,33 +439,27 @@ static void codegen(const Config &Conf, TargetMachine *TM, std::unique_ptr &Stream = *StreamOrErr; TM->Options.ObjectFilenameForDebug = Stream->ObjectPathName; - // Create the codegen pipeline in its own scope so it gets deleted before - // Stream->commit() is called. The commit function of CacheStream deletes - // the raw stream, which is too early as streamers (e.g. MCAsmStreamer) - // keep the pointer and may use it until their destruction. See #138194. - { - legacy::PassManager CodeGenPasses; - TargetLibraryInfoImpl TLII(Mod.getTargetTriple()); - CodeGenPasses.add(new TargetLibraryInfoWrapperPass(TLII)); - // No need to make index available if the module is empty. - // In theory these passes should not use the index for an empty - // module, however, this guards against doing any unnecessary summary-based - // analysis in the case of a ThinLTO build where this might be an empty - // regular LTO combined module, with a large combined index from ThinLTO. - if (!isEmptyModule(Mod)) - CodeGenPasses.add( - createImmutableModuleSummaryIndexWrapperPass(&CombinedIndex)); - if (Conf.PreCodeGenPassesHook) - Conf.PreCodeGenPassesHook(CodeGenPasses); - if (TM->addPassesToEmitFile(CodeGenPasses, *Stream->OS, - DwoOut ? &DwoOut->os() : nullptr, - Conf.CGFileType)) - report_fatal_error("Failed to setup codegen"); - CodeGenPasses.run(Mod); - - if (DwoOut) - DwoOut->keep(); - } + legacy::PassManager CodeGenPasses; + TargetLibraryInfoImpl TLII(Mod.getTargetTriple()); + CodeGenPasses.add(new TargetLibraryInfoWrapperPass(TLII)); + // No need to make index available if the module is empty. + // In theory these passes should not use the index for an empty + // module, however, this guards against doing any unnecessary summary-based + // analysis in the case of a ThinLTO build where this might be an empty + // regular LTO combined module, with a large combined index from ThinLTO. + if (!isEmptyModule(Mod)) + CodeGenPasses.add( + createImmutableModuleSummaryIndexWrapperPass(&CombinedIndex)); + if (Conf.PreCodeGenPassesHook) + Conf.PreCodeGenPassesHook(CodeGenPasses); + if (TM->addPassesToEmitFile(CodeGenPasses, *Stream->OS, + DwoOut ? &DwoOut->os() : nullptr, + Conf.CGFileType)) + report_fatal_error("Failed to setup codegen"); + CodeGenPasses.run(Mod); + + if (DwoOut) + DwoOut->keep(); if (Error Err = Stream->commit()) report_fatal_error(std::move(Err)); diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 56e91703cb019..8e69683c1d4ce 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -371,7 +371,6 @@ #include "llvm/Transforms/Utils/SymbolRewriter.h" #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" #include "llvm/Transforms/Utils/UnifyLoopExits.h" -#include "llvm/Transforms/Vectorize/EVLIndVarSimplify.h" #include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h" #include "llvm/Transforms/Vectorize/LoopIdiomVectorize.h" #include "llvm/Transforms/Vectorize/LoopVectorize.h" diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index f3654600c5abb..5a85b308925a6 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -142,7 +142,6 @@ #include "llvm/Transforms/Utils/NameAnonGlobals.h" #include "llvm/Transforms/Utils/RelLookupTableConverter.h" #include "llvm/Transforms/Utils/SimplifyCFGOptions.h" -#include "llvm/Transforms/Vectorize/EVLIndVarSimplify.h" #include "llvm/Transforms/Vectorize/LoopVectorize.h" #include "llvm/Transforms/Vectorize/SLPVectorizer.h" #include "llvm/Transforms/Vectorize/VectorCombine.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 94dabe290213d..ea792280ed975 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -673,7 +673,6 @@ LOOP_ANALYSIS("should-run-extra-simple-loop-unswitch", #endif LOOP_PASS("canon-freeze", CanonicalizeFreezeInLoopsPass()) LOOP_PASS("dot-ddg", DDGDotPrinterPass()) -LOOP_PASS("evl-iv-simplify", EVLIndVarSimplifyPass()) LOOP_PASS("guard-widening", GuardWideningPass()) LOOP_PASS("extra-simple-loop-unswitch-passes", ExtraLoopPassManager()) diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index 9dc1a0d0b4678..2759346935b14 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -152,7 +152,9 @@ void InstrProfWriter::setValueProfDataEndianness(llvm::endianness Endianness) { InfoObj->ValueProfDataEndianness = Endianness; } -void InstrProfWriter::setOutputSparse(bool Sparse) { this->Sparse = Sparse; } +void InstrProfWriter::setOutputSparse(bool Sparse) { + this->Sparse = Sparse; +} void InstrProfWriter::addRecord(NamedInstrProfRecord &&I, uint64_t Weight, function_ref Warn) { diff --git a/llvm/lib/Support/APFixedPoint.cpp b/llvm/lib/Support/APFixedPoint.cpp index 9a7caa4112625..f395919287b72 100644 --- a/llvm/lib/Support/APFixedPoint.cpp +++ b/llvm/lib/Support/APFixedPoint.cpp @@ -439,10 +439,7 @@ void APFixedPoint::print(raw_ostream &OS) const { Sema.print(OS); OS << "})"; } - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void APFixedPoint::dump() const { print(llvm::errs()); } -#endif APFixedPoint APFixedPoint::negate(bool *Overflow) const { if (!isSaturated()) { diff --git a/llvm/lib/Support/ConvertUTFWrapper.cpp b/llvm/lib/Support/ConvertUTFWrapper.cpp index 76ead00c977bd..4952fe65d7767 100644 --- a/llvm/lib/Support/ConvertUTFWrapper.cpp +++ b/llvm/lib/Support/ConvertUTFWrapper.cpp @@ -303,15 +303,5 @@ bool convertWideToUTF8(const std::wstring &Source, std::string &Result) { } } -bool IsSingleCodeUnitUTF8Codepoint(unsigned V) { return V <= 0x7F; } - -bool IsSingleCodeUnitUTF16Codepoint(unsigned V) { - return V <= 0xD7FF || (V >= 0xE000 && V <= 0xFFFF); -} - -bool IsSingleCodeUnitUTF32Codepoint(unsigned V) { - return V <= 0xD7FF || (V >= 0xE000 && V <= 0x10FFFF); -} - } // end namespace llvm diff --git a/llvm/lib/Support/DebugCounter.cpp b/llvm/lib/Support/DebugCounter.cpp index 9c4a4429ca0ee..a6de07a55482a 100644 --- a/llvm/lib/Support/DebugCounter.cpp +++ b/llvm/lib/Support/DebugCounter.cpp @@ -248,8 +248,6 @@ bool DebugCounter::shouldExecuteImpl(unsigned CounterName) { return true; } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void DebugCounter::dump() const { print(dbgs()); } -#endif diff --git a/llvm/lib/Support/DynamicAPInt.cpp b/llvm/lib/Support/DynamicAPInt.cpp index 9def5c782af4c..bfcb97e0cc96a 100644 --- a/llvm/lib/Support/DynamicAPInt.cpp +++ b/llvm/lib/Support/DynamicAPInt.cpp @@ -32,6 +32,4 @@ raw_ostream &DynamicAPInt::print(raw_ostream &OS) const { return OS << ValLarge; } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void DynamicAPInt::dump() const { print(dbgs()); } -#endif +void DynamicAPInt::dump() const { print(dbgs()); } diff --git a/llvm/lib/Support/FileOutputBuffer.cpp b/llvm/lib/Support/FileOutputBuffer.cpp index a2396d7629488..58a06a34e8cf3 100644 --- a/llvm/lib/Support/FileOutputBuffer.cpp +++ b/llvm/lib/Support/FileOutputBuffer.cpp @@ -186,7 +186,7 @@ FileOutputBuffer::create(StringRef Path, size_t Size, unsigned Flags) { case fs::file_type::regular_file: case fs::file_type::file_not_found: case fs::file_type::status_error: - if (Flags & F_mmap) + if (Flags & F_no_mmap) return createInMemoryBuffer(Path, Size, Mode); else return createOnDiskBuffer(Path, Size, Mode); diff --git a/llvm/lib/Support/KnownBits.cpp b/llvm/lib/Support/KnownBits.cpp index 94a04ab90987a..16229598b612a 100644 --- a/llvm/lib/Support/KnownBits.cpp +++ b/llvm/lib/Support/KnownBits.cpp @@ -1152,10 +1152,7 @@ void KnownBits::print(raw_ostream &OS) const { OS << "?"; } } - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void KnownBits::dump() const { +void KnownBits::dump() const { print(dbgs()); dbgs() << "\n"; } -#endif diff --git a/llvm/lib/Support/ScaledNumber.cpp b/llvm/lib/Support/ScaledNumber.cpp index 33e8cc3030873..85d7afbea5c69 100644 --- a/llvm/lib/Support/ScaledNumber.cpp +++ b/llvm/lib/Support/ScaledNumber.cpp @@ -317,9 +317,7 @@ raw_ostream &ScaledNumberBase::print(raw_ostream &OS, uint64_t D, int16_t E, return OS << toString(D, E, Width, Precision); } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void ScaledNumberBase::dump(uint64_t D, int16_t E, int Width) { +void ScaledNumberBase::dump(uint64_t D, int16_t E, int Width) { print(dbgs(), D, E, Width, 0) << "[" << Width << ":" << D << "*2^" << E << "]"; } -#endif diff --git a/llvm/lib/Support/SlowDynamicAPInt.cpp b/llvm/lib/Support/SlowDynamicAPInt.cpp index a57fec2f824e1..8b4030ddf9fc4 100644 --- a/llvm/lib/Support/SlowDynamicAPInt.cpp +++ b/llvm/lib/Support/SlowDynamicAPInt.cpp @@ -283,6 +283,4 @@ SlowDynamicAPInt &SlowDynamicAPInt::operator--() { /// --------------------------------------------------------------------------- void SlowDynamicAPInt::print(raw_ostream &OS) const { OS << Val; } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void SlowDynamicAPInt::dump() const { print(dbgs()); } -#endif +void SlowDynamicAPInt::dump() const { print(dbgs()); } diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp index 7a23421eaeb89..76c705c097aaa 100644 --- a/llvm/lib/Support/SpecialCaseList.cpp +++ b/llvm/lib/Support/SpecialCaseList.cpp @@ -132,14 +132,16 @@ bool SpecialCaseList::createInternal(const MemoryBuffer *MB, Expected SpecialCaseList::addSection(StringRef SectionStr, unsigned LineNo, bool UseGlobs) { - auto [It, DidEmplace] = Sections.try_emplace(SectionStr); - auto &Section = It->getValue(); - if (DidEmplace) - if (auto Err = Section.SectionMatcher->insert(SectionStr, LineNo, UseGlobs)) - return createStringError(errc::invalid_argument, - "malformed section at line " + Twine(LineNo) + - ": '" + SectionStr + - "': " + toString(std::move(Err))); + Sections.emplace_back(SectionStr); + auto &Section = Sections.back(); + + if (auto Err = Section.SectionMatcher->insert(SectionStr, LineNo, UseGlobs)) { + return createStringError(errc::invalid_argument, + "malformed section at line " + Twine(LineNo) + + ": '" + SectionStr + + "': " + toString(std::move(Err))); + } + return &Section; } @@ -213,9 +215,8 @@ unsigned SpecialCaseList::inSectionBlame(StringRef Section, StringRef Prefix, StringRef Query, StringRef Category) const { for (const auto &It : Sections) { - const auto &S = It.getValue(); - if (S.SectionMatcher->match(Section)) { - unsigned Blame = inSectionBlame(S.Entries, Prefix, Query, Category); + if (It.SectionMatcher->match(Section)) { + unsigned Blame = inSectionBlame(It.Entries, Prefix, Query, Category); if (Blame) return Blame; } @@ -227,9 +228,11 @@ unsigned SpecialCaseList::inSectionBlame(const SectionEntries &Entries, StringRef Prefix, StringRef Query, StringRef Category) const { SectionEntries::const_iterator I = Entries.find(Prefix); - if (I == Entries.end()) return 0; + if (I == Entries.end()) + return 0; StringMap::const_iterator II = I->second.find(Category); - if (II == I->second.end()) return 0; + if (II == I->second.end()) + return 0; return II->getValue().match(Query); } diff --git a/llvm/lib/Support/Z3Solver.cpp b/llvm/lib/Support/Z3Solver.cpp index 27027093a0c6f..9aece099b0629 100644 --- a/llvm/lib/Support/Z3Solver.cpp +++ b/llvm/lib/Support/Z3Solver.cpp @@ -989,9 +989,7 @@ llvm::SMTSolverRef llvm::CreateZ3Solver() { #endif } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void SMTSort::dump() const { print(llvm::errs()); } LLVM_DUMP_METHOD void SMTExpr::dump() const { print(llvm::errs()); } LLVM_DUMP_METHOD void SMTSolver::dump() const { print(llvm::errs()); } LLVM_DUMP_METHOD void SMTSolverStatistics::dump() const { print(llvm::errs()); } -#endif diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index bcff151fe62e7..040662a5f11dd 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -3611,9 +3611,6 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, unsigned ExtraCSSpill = 0; bool HasUnpairedGPR64 = false; bool HasPairZReg = false; - BitVector UserReservedRegs = RegInfo->getUserReservedRegs(MF); - BitVector ReservedRegs = RegInfo->getReservedRegs(MF); - // Figure out which callee-saved registers to save/restore. for (unsigned i = 0; CSRegs[i]; ++i) { const unsigned Reg = CSRegs[i]; @@ -3624,7 +3621,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // Don't save manually reserved registers set through +reserve-x#i, // even for callee-saved registers, as per GCC's behavior. - if (UserReservedRegs[Reg]) { + if (RegInfo->isUserReservedReg(MF, Reg)) { SavedRegs.reset(Reg); continue; } @@ -3656,7 +3653,8 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, AArch64::FPR128RegClass.contains(Reg, PairedReg)); if (!RegUsed) { - if (AArch64::GPR64RegClass.contains(Reg) && !ReservedRegs[Reg]) { + if (AArch64::GPR64RegClass.contains(Reg) && + !RegInfo->isReservedReg(MF, Reg)) { UnspilledCSGPR = Reg; UnspilledCSGPRPaired = PairedReg; } @@ -3678,7 +3676,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, !SavedRegs.test(PairedReg)) { SavedRegs.set(PairedReg); if (AArch64::GPR64RegClass.contains(PairedReg) && - !ReservedRegs[PairedReg]) + !RegInfo->isReservedReg(MF, PairedReg)) ExtraCSSpill = PairedReg; } // Check if there is a pair of ZRegs, so it can select PReg for spill/fill @@ -3701,7 +3699,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, AFI->setPredicateRegForFillSpill(AArch64::PN8); } - assert(!ReservedRegs[AFI->getPredicateRegForFillSpill()] && + assert(!RegInfo->isReservedReg(MF, AFI->getPredicateRegForFillSpill()) && "Predicate cannot be a reserved register"); } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index fb7f7d6f7537d..13fb6a32233fe 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -290,7 +290,6 @@ static bool isZeroingInactiveLanes(SDValue Op) { return false; // We guarantee i1 splat_vectors to zero the other lanes case ISD::SPLAT_VECTOR: - case ISD::GET_ACTIVE_LANE_MASK: case AArch64ISD::PTRUE: case AArch64ISD::SETCC_MERGE_ZERO: return true; @@ -1179,8 +1178,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::CTLZ); - setTargetDAGCombine(ISD::GET_ACTIVE_LANE_MASK); - setTargetDAGCombine(ISD::VECREDUCE_AND); setTargetDAGCombine(ISD::VECREDUCE_OR); setTargetDAGCombine(ISD::VECREDUCE_XOR); @@ -1496,13 +1493,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom); setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom); } - for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) { + for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) setOperationAction(ISD::VECTOR_FIND_LAST_ACTIVE, VT, Legal); - setOperationAction(ISD::GET_ACTIVE_LANE_MASK, VT, Legal); - } - - for (auto VT : {MVT::v16i8, MVT::v8i8, MVT::v4i16, MVT::v2i32}) - setOperationAction(ISD::GET_ACTIVE_LANE_MASK, VT, Custom); } if (Subtarget->isSVEorStreamingSVEAvailable()) { @@ -5739,24 +5731,21 @@ static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT, DAG.getTargetConstant(Pattern, DL, MVT::i32)); } -static SDValue optimizeIncrementingWhile(SDNode *N, SelectionDAG &DAG, +static SDValue optimizeIncrementingWhile(SDValue Op, SelectionDAG &DAG, bool IsSigned, bool IsEqual) { - unsigned Op0 = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 1 : 0; - unsigned Op1 = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 2 : 1; - - if (!isa(N->getOperand(Op0)) || - !isa(N->getOperand(Op1))) + if (!isa(Op.getOperand(1)) || + !isa(Op.getOperand(2))) return SDValue(); - SDLoc dl(N); - APInt X = N->getConstantOperandAPInt(Op0); - APInt Y = N->getConstantOperandAPInt(Op1); + SDLoc dl(Op); + APInt X = Op.getConstantOperandAPInt(1); + APInt Y = Op.getConstantOperandAPInt(2); // When the second operand is the maximum value, comparisons that include // equality can never fail and thus we can return an all active predicate. if (IsEqual) if (IsSigned ? Y.isMaxSignedValue() : Y.isMaxValue()) - return DAG.getConstant(1, dl, N->getValueType(0)); + return DAG.getConstant(1, dl, Op.getValueType()); bool Overflow; APInt NumActiveElems = @@ -5777,10 +5766,10 @@ static SDValue optimizeIncrementingWhile(SDNode *N, SelectionDAG &DAG, getSVEPredPatternFromNumElements(NumActiveElems.getZExtValue()); unsigned MinSVEVectorSize = std::max( DAG.getSubtarget().getMinSVEVectorSizeInBits(), 128u); - unsigned ElementSize = 128 / N->getValueType(0).getVectorMinNumElements(); + unsigned ElementSize = 128 / Op.getValueType().getVectorMinNumElements(); if (PredPattern != std::nullopt && NumActiveElems.getZExtValue() <= (MinSVEVectorSize / ElementSize)) - return getPTrue(DAG, dl, N->getValueType(0), *PredPattern); + return getPTrue(DAG, dl, Op.getValueType(), *PredPattern); return SDValue(); } @@ -6232,14 +6221,17 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, DAG.getNode( AArch64ISD::URSHR_I, dl, Op.getOperand(1).getValueType(), Op.getOperand(1), Op.getOperand(2))); return SDValue(); + case Intrinsic::aarch64_sve_whilelo: + return optimizeIncrementingWhile(Op, DAG, /*IsSigned=*/false, + /*IsEqual=*/false); case Intrinsic::aarch64_sve_whilelt: - return optimizeIncrementingWhile(Op.getNode(), DAG, /*IsSigned=*/true, + return optimizeIncrementingWhile(Op, DAG, /*IsSigned=*/true, /*IsEqual=*/false); case Intrinsic::aarch64_sve_whilels: - return optimizeIncrementingWhile(Op.getNode(), DAG, /*IsSigned=*/false, + return optimizeIncrementingWhile(Op, DAG, /*IsSigned=*/false, /*IsEqual=*/true); case Intrinsic::aarch64_sve_whilele: - return optimizeIncrementingWhile(Op.getNode(), DAG, /*IsSigned=*/true, + return optimizeIncrementingWhile(Op, DAG, /*IsSigned=*/true, /*IsEqual=*/true); case Intrinsic::aarch64_sve_sunpkhi: return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(), @@ -6540,6 +6532,28 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(AArch64ISD::USDOT, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); } + case Intrinsic::get_active_lane_mask: { + SDValue ID = + DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, dl, MVT::i64); + + EVT VT = Op.getValueType(); + if (VT.isScalableVector()) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, ID, Op.getOperand(1), + Op.getOperand(2)); + + // We can use the SVE whilelo instruction to lower this intrinsic by + // creating the appropriate sequence of scalable vector operations and + // then extracting a fixed-width subvector from the scalable vector. + + EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT); + EVT WhileVT = ContainerVT.changeElementType(MVT::i1); + + SDValue Mask = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, WhileVT, ID, + Op.getOperand(1), Op.getOperand(2)); + SDValue MaskAsInt = DAG.getNode(ISD::SIGN_EXTEND, dl, ContainerVT, Mask); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, MaskAsInt, + DAG.getVectorIdxConstant(0, dl)); + } case Intrinsic::aarch64_neon_saddlv: case Intrinsic::aarch64_neon_uaddlv: { EVT OpVT = Op.getOperand(1).getValueType(); @@ -7678,8 +7692,6 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, return LowerVECTOR_DEINTERLEAVE(Op, DAG); case ISD::VECTOR_INTERLEAVE: return LowerVECTOR_INTERLEAVE(Op, DAG); - case ISD::GET_ACTIVE_LANE_MASK: - return LowerGET_ACTIVE_LANE_MASK(Op, DAG); case ISD::LRINT: case ISD::LLRINT: if (Op.getValueType().isVector()) @@ -8629,16 +8641,6 @@ static void analyzeCallOperands(const AArch64TargetLowering &TLI, } } -static SMECallAttrs -getSMECallAttrs(const Function &Caller, - const TargetLowering::CallLoweringInfo &CLI) { - if (CLI.CB) - return SMECallAttrs(*CLI.CB); - if (auto *ES = dyn_cast(CLI.Callee)) - return SMECallAttrs(SMEAttrs(Caller), SMEAttrs(ES->getSymbol())); - return SMECallAttrs(SMEAttrs(Caller), SMEAttrs(SMEAttrs::Normal)); -} - bool AArch64TargetLowering::isEligibleForTailCallOptimization( const CallLoweringInfo &CLI) const { CallingConv::ID CalleeCC = CLI.CallConv; @@ -8657,10 +8659,12 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization( // SME Streaming functions are not eligible for TCO as they may require // the streaming mode or ZA to be restored after returning from the call. - SMECallAttrs CallAttrs = getSMECallAttrs(CallerF, CLI); - if (CallAttrs.requiresSMChange() || CallAttrs.requiresLazySave() || - CallAttrs.requiresPreservingAllZAState() || - CallAttrs.caller().hasStreamingBody()) + SMEAttrs CallerAttrs(MF.getFunction()); + auto CalleeAttrs = CLI.CB ? SMEAttrs(*CLI.CB) : SMEAttrs(SMEAttrs::Normal); + if (CallerAttrs.requiresSMChange(CalleeAttrs) || + CallerAttrs.requiresLazySave(CalleeAttrs) || + CallerAttrs.requiresPreservingAllZAState(CalleeAttrs) || + CallerAttrs.hasStreamingBody()) return false; // Functions using the C or Fast calling convention that have an SVE signature @@ -8952,14 +8956,14 @@ static SDValue emitSMEStateSaveRestore(const AArch64TargetLowering &TLI, return TLI.LowerCallTo(CLI).second; } -static AArch64SME::ToggleCondition -getSMToggleCondition(const SMECallAttrs &CallAttrs) { - if (!CallAttrs.caller().hasStreamingCompatibleInterface() || - CallAttrs.caller().hasStreamingBody()) +static unsigned getSMCondition(const SMEAttrs &CallerAttrs, + const SMEAttrs &CalleeAttrs) { + if (!CallerAttrs.hasStreamingCompatibleInterface() || + CallerAttrs.hasStreamingBody()) return AArch64SME::Always; - if (CallAttrs.callee().hasNonStreamingInterface()) + if (CalleeAttrs.hasNonStreamingInterface()) return AArch64SME::IfCallerIsStreaming; - if (CallAttrs.callee().hasStreamingInterface()) + if (CalleeAttrs.hasStreamingInterface()) return AArch64SME::IfCallerIsNonStreaming; llvm_unreachable("Unsupported attributes"); @@ -9092,7 +9096,11 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, } // Determine whether we need any streaming mode changes. - SMECallAttrs CallAttrs = getSMECallAttrs(MF.getFunction(), CLI); + SMEAttrs CalleeAttrs, CallerAttrs(MF.getFunction()); + if (CLI.CB) + CalleeAttrs = SMEAttrs(*CLI.CB); + else if (auto *ES = dyn_cast(CLI.Callee)) + CalleeAttrs = SMEAttrs(ES->getSymbol()); auto DescribeCallsite = [&](OptimizationRemarkAnalysis &R) -> OptimizationRemarkAnalysis & { @@ -9107,8 +9115,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, return R; }; - bool RequiresLazySave = CallAttrs.requiresLazySave(); - bool RequiresSaveAllZA = CallAttrs.requiresPreservingAllZAState(); + bool RequiresLazySave = CallerAttrs.requiresLazySave(CalleeAttrs); + bool RequiresSaveAllZA = + CallerAttrs.requiresPreservingAllZAState(CalleeAttrs); if (RequiresLazySave) { const TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj(); MachinePointerInfo MPI = @@ -9136,18 +9145,18 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, return DescribeCallsite(R) << " sets up a lazy save for ZA"; }); } else if (RequiresSaveAllZA) { - assert(!CallAttrs.callee().hasSharedZAInterface() && + assert(!CalleeAttrs.hasSharedZAInterface() && "Cannot share state that may not exist"); Chain = emitSMEStateSaveRestore(*this, DAG, FuncInfo, DL, Chain, /*IsSave=*/true); } SDValue PStateSM; - bool RequiresSMChange = CallAttrs.requiresSMChange(); + bool RequiresSMChange = CallerAttrs.requiresSMChange(CalleeAttrs); if (RequiresSMChange) { - if (CallAttrs.caller().hasStreamingInterfaceOrBody()) + if (CallerAttrs.hasStreamingInterfaceOrBody()) PStateSM = DAG.getConstant(1, DL, MVT::i64); - else if (CallAttrs.caller().hasNonStreamingInterface()) + else if (CallerAttrs.hasNonStreamingInterface()) PStateSM = DAG.getConstant(0, DL, MVT::i64); else PStateSM = getRuntimePStateSM(DAG, Chain, DL, MVT::i64); @@ -9164,7 +9173,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, SDValue ZTFrameIdx; MachineFrameInfo &MFI = MF.getFrameInfo(); - bool ShouldPreserveZT0 = CallAttrs.requiresPreservingZT0(); + bool ShouldPreserveZT0 = CallerAttrs.requiresPreservingZT0(CalleeAttrs); // If the caller has ZT0 state which will not be preserved by the callee, // spill ZT0 before the call. @@ -9180,7 +9189,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, // If caller shares ZT0 but the callee is not shared ZA, we need to stop // PSTATE.ZA before the call if there is no lazy-save active. - bool DisableZA = CallAttrs.requiresDisablingZABeforeCall(); + bool DisableZA = CallerAttrs.requiresDisablingZABeforeCall(CalleeAttrs); assert((!DisableZA || !RequiresLazySave) && "Lazy-save should have PSTATE.SM=1 on entry to the function"); @@ -9463,8 +9472,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, } SDValue NewChain = changeStreamingMode( - DAG, DL, CallAttrs.callee().hasStreamingInterface(), Chain, InGlue, - getSMToggleCondition(CallAttrs), PStateSM); + DAG, DL, CalleeAttrs.hasStreamingInterface(), Chain, InGlue, + getSMCondition(CallerAttrs, CalleeAttrs), PStateSM); Chain = NewChain.getValue(0); InGlue = NewChain.getValue(1); } @@ -9650,8 +9659,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, if (RequiresSMChange) { assert(PStateSM && "Expected a PStateSM to be set"); Result = changeStreamingMode( - DAG, DL, !CallAttrs.callee().hasStreamingInterface(), Result, InGlue, - getSMToggleCondition(CallAttrs), PStateSM); + DAG, DL, !CalleeAttrs.hasStreamingInterface(), Result, InGlue, + getSMCondition(CallerAttrs, CalleeAttrs), PStateSM); if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) { InGlue = Result.getValue(1); @@ -9661,7 +9670,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, } } - if (CallAttrs.requiresEnablingZAAfterCall()) + if (CallerAttrs.requiresEnablingZAAfterCall(CalleeAttrs)) // Unconditionally resume ZA. Result = DAG.getNode( AArch64ISD::SMSTART, DL, MVT::Other, Result, @@ -18140,70 +18149,6 @@ static SDValue performVecReduceAddCombineWithUADDLP(SDNode *N, return DAG.getNode(ISD::VECREDUCE_ADD, DL, MVT::i32, UADDLP); } -static SDValue -performActiveLaneMaskCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, - const AArch64Subtarget *ST) { - if (DCI.isBeforeLegalize()) - return SDValue(); - - if (SDValue While = optimizeIncrementingWhile(N, DCI.DAG, /*IsSigned=*/false, - /*IsEqual=*/false)) - return While; - - if (!ST->hasSVE2p1()) - return SDValue(); - - if (!N->hasNUsesOfValue(2, 0)) - return SDValue(); - - const uint64_t HalfSize = N->getValueType(0).getVectorMinNumElements() / 2; - if (HalfSize < 2) - return SDValue(); - - auto It = N->user_begin(); - SDNode *Lo = *It++; - SDNode *Hi = *It; - - if (Lo->getOpcode() != ISD::EXTRACT_SUBVECTOR || - Hi->getOpcode() != ISD::EXTRACT_SUBVECTOR) - return SDValue(); - - uint64_t OffLo = Lo->getConstantOperandVal(1); - uint64_t OffHi = Hi->getConstantOperandVal(1); - - if (OffLo > OffHi) { - std::swap(Lo, Hi); - std::swap(OffLo, OffHi); - } - - if (OffLo != 0 || OffHi != HalfSize) - return SDValue(); - - EVT HalfVec = Lo->getValueType(0); - if (HalfVec != Hi->getValueType(0) || - HalfVec.getVectorElementCount() != ElementCount::getScalable(HalfSize)) - return SDValue(); - - SelectionDAG &DAG = DCI.DAG; - SDLoc DL(N); - SDValue ID = - DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo_x2, DL, MVT::i64); - SDValue Idx = N->getOperand(0); - SDValue TC = N->getOperand(1); - if (Idx.getValueType() != MVT::i64) { - Idx = DAG.getZExtOrTrunc(Idx, DL, MVT::i64); - TC = DAG.getZExtOrTrunc(TC, DL, MVT::i64); - } - auto R = - DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, - {Lo->getValueType(0), Hi->getValueType(0)}, {ID, Idx, TC}); - - DCI.CombineTo(Lo, R.getValue(0)); - DCI.CombineTo(Hi, R.getValue(1)); - - return SDValue(N, 0); -} - // Turn a v8i8/v16i8 extended vecreduce into a udot/sdot and vecreduce // vecreduce.add(ext(A)) to vecreduce.add(DOT(zero, A, one)) // vecreduce.add(mul(ext(A), ext(B))) to vecreduce.add(DOT(zero, A, B)) @@ -19734,8 +19679,6 @@ static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op, static bool isPredicateCCSettingOp(SDValue N) { if ((N.getOpcode() == ISD::SETCC) || - // get_active_lane_mask is lowered to a whilelo instruction. - (N.getOpcode() == ISD::GET_ACTIVE_LANE_MASK) || (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN && (N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilege || N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilegt || @@ -19744,7 +19687,9 @@ static bool isPredicateCCSettingOp(SDValue N) { N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilele || N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelo || N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilels || - N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt))) + N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt || + // get_active_lane_mask is lowered to a whilelo instruction. + N.getConstantOperandVal(0) == Intrinsic::get_active_lane_mask))) return true; return false; @@ -21858,6 +21803,66 @@ static SDValue convertMergedOpToPredOp(SDNode *N, unsigned Opc, return SDValue(); } +static SDValue tryCombineWhileLo(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const AArch64Subtarget *Subtarget) { + if (DCI.isBeforeLegalize()) + return SDValue(); + + if (!Subtarget->hasSVE2p1()) + return SDValue(); + + if (!N->hasNUsesOfValue(2, 0)) + return SDValue(); + + const uint64_t HalfSize = N->getValueType(0).getVectorMinNumElements() / 2; + if (HalfSize < 2) + return SDValue(); + + auto It = N->user_begin(); + SDNode *Lo = *It++; + SDNode *Hi = *It; + + if (Lo->getOpcode() != ISD::EXTRACT_SUBVECTOR || + Hi->getOpcode() != ISD::EXTRACT_SUBVECTOR) + return SDValue(); + + uint64_t OffLo = Lo->getConstantOperandVal(1); + uint64_t OffHi = Hi->getConstantOperandVal(1); + + if (OffLo > OffHi) { + std::swap(Lo, Hi); + std::swap(OffLo, OffHi); + } + + if (OffLo != 0 || OffHi != HalfSize) + return SDValue(); + + EVT HalfVec = Lo->getValueType(0); + if (HalfVec != Hi->getValueType(0) || + HalfVec.getVectorElementCount() != ElementCount::getScalable(HalfSize)) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + SDLoc DL(N); + SDValue ID = + DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo_x2, DL, MVT::i64); + SDValue Idx = N->getOperand(1); + SDValue TC = N->getOperand(2); + if (Idx.getValueType() != MVT::i64) { + Idx = DAG.getZExtOrTrunc(Idx, DL, MVT::i64); + TC = DAG.getZExtOrTrunc(TC, DL, MVT::i64); + } + auto R = + DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, + {Lo->getValueType(0), Hi->getValueType(0)}, {ID, Idx, TC}); + + DCI.CombineTo(Lo, R.getValue(0)); + DCI.CombineTo(Hi, R.getValue(1)); + + return SDValue(N, 0); +} + SDValue tryLowerPartialReductionToDot(SDNode *N, const AArch64Subtarget *Subtarget, SelectionDAG &DAG) { @@ -22337,8 +22342,7 @@ static SDValue performIntrinsicCombine(SDNode *N, return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2), AArch64CC::LAST_ACTIVE); case Intrinsic::aarch64_sve_whilelo: - return DAG.getNode(ISD::GET_ACTIVE_LANE_MASK, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2)); + return tryCombineWhileLo(N, DCI, Subtarget); case Intrinsic::aarch64_sve_bsl: case Intrinsic::aarch64_sve_bsl1n: case Intrinsic::aarch64_sve_bsl2n: @@ -26770,8 +26774,6 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performExtractVectorEltCombine(N, DCI, Subtarget); case ISD::VECREDUCE_ADD: return performVecReduceAddCombine(N, DCI.DAG, Subtarget); - case ISD::GET_ACTIVE_LANE_MASK: - return performActiveLaneMaskCombine(N, DCI, Subtarget); case AArch64ISD::UADDV: return performUADDVCombine(N, DAG); case AArch64ISD::SMULL: @@ -27754,7 +27756,8 @@ void AArch64TargetLowering::ReplaceNodeResults( DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, RuntimePStateSM)); return; } - case Intrinsic::experimental_vector_match: { + case Intrinsic::experimental_vector_match: + case Intrinsic::get_active_lane_mask: { if (!VT.isFixedLengthVector() || VT.getVectorElementType() != MVT::i1) return; @@ -28166,8 +28169,8 @@ bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &, static Value *UseTlsOffset(IRBuilderBase &IRB, unsigned Offset) { Module *M = IRB.GetInsertBlock()->getParent()->getParent(); - Function *ThreadPointerFunc = Intrinsic::getOrInsertDeclaration( - M, Intrinsic::thread_pointer, IRB.getPtrTy()); + Function *ThreadPointerFunc = + Intrinsic::getOrInsertDeclaration(M, Intrinsic::thread_pointer); return IRB.CreatePointerCast( IRB.CreateConstGEP1_32(IRB.getInt8Ty(), IRB.CreateCall(ThreadPointerFunc), Offset), @@ -28556,10 +28559,12 @@ bool AArch64TargetLowering::fallBackToDAGISel(const Instruction &Inst) const { // Checks to allow the use of SME instructions if (auto *Base = dyn_cast(&Inst)) { - auto CallAttrs = SMECallAttrs(*Base); - if (CallAttrs.requiresSMChange() || CallAttrs.requiresLazySave() || - CallAttrs.requiresPreservingZT0() || - CallAttrs.requiresPreservingAllZAState()) + auto CallerAttrs = SMEAttrs(*Inst.getFunction()); + auto CalleeAttrs = SMEAttrs(*Base); + if (CallerAttrs.requiresSMChange(CalleeAttrs) || + CallerAttrs.requiresLazySave(CalleeAttrs) || + CallerAttrs.requiresPreservingZT0(CalleeAttrs) || + CallerAttrs.requiresPreservingAllZAState(CalleeAttrs)) return true; } return false; @@ -29546,29 +29551,6 @@ AArch64TargetLowering::LowerPARTIAL_REDUCE_MLA(SDValue Op, return DAG.getNode(ISD::ADD, DL, ResultVT, Acc, Extended); } -SDValue -AArch64TargetLowering::LowerGET_ACTIVE_LANE_MASK(SDValue Op, - SelectionDAG &DAG) const { - EVT VT = Op.getValueType(); - assert(VT.isFixedLengthVector() && "Expected fixed length vector type!"); - - assert(Subtarget->isSVEorStreamingSVEAvailable() && - "Lowering fixed length get_active_lane_mask requires SVE!"); - - // There are no dedicated fixed-length instructions for GET_ACTIVE_LANE_MASK, - // but we can use SVE when available. - - SDLoc DL(Op); - EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT); - EVT WhileVT = ContainerVT.changeElementType(MVT::i1); - - SDValue Mask = DAG.getNode(ISD::GET_ACTIVE_LANE_MASK, DL, WhileVT, - Op.getOperand(0), Op.getOperand(1)); - SDValue MaskAsInt = DAG.getNode(ISD::SIGN_EXTEND, DL, ContainerVT, Mask); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, MaskAsInt, - DAG.getVectorIdxConstant(0, DL)); -} - SDValue AArch64TargetLowering::LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const { diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index c1e6d70099fa5..ec8b0b920c453 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -1183,7 +1183,6 @@ class AArch64TargetLowering : public TargetLowering { SDValue LowerVECTOR_INTERLEAVE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECTOR_HISTOGRAM(SDValue Op, SelectionDAG &DAG) const; SDValue LowerPARTIAL_REDUCE_MLA(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerGET_ACTIVE_LANE_MASK(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 5489541fcb318..33241c65a4a37 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -10188,7 +10188,7 @@ multiclass SIMDScalarLShiftDTied opc, string asm, def d : BaseSIMDScalarShiftTied { + (i32 vecshiftR64:$imm)))]> { let Inst{21-16} = imm{5-0}; } } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 010c7c391527f..b02a907f7439f 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -7358,8 +7358,7 @@ def : Pat<(v2i64 (vector_insert v2i64:$src, (i64 (bitconvert (f64 FPR64:$Sn))), // Patterns for i8/i16 -> v2i32/v4i16 lane moves via insert and extract that go via i32. multiclass Neon_INS_elt_ext_pattern { + Instruction INS, SDNodeXForm VecIndexMult> { // VT64->OutVT def : Pat<(OutVT (vector_insert (OutVT V64:$src), (i32 (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))), @@ -7370,10 +7369,8 @@ multiclass Neon_INS_elt_ext_pattern; def : Pat<(OutVT (scalar_to_vector (i32 (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))))), (EXTRACT_SUBREG - (VT128 (SUBREG_TO_REG - (i64 0), - (DUP (INSERT_SUBREG (VT128 (IMPLICIT_DEF)), V64:$Rn, dsub), imm:$Immn), - DUPSub)), + (INS (IMPLICIT_DEF), 0, + (INSERT_SUBREG (VT128 (IMPLICIT_DEF)), V64:$Rn, dsub), imm:$Immn), dsub)>; // VT128->OutVT @@ -7386,38 +7383,25 @@ multiclass Neon_INS_elt_ext_pattern; def : Pat<(OutVT (scalar_to_vector (i32 (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))))), (EXTRACT_SUBREG - (VT128 (SUBREG_TO_REG - (i64 0), - (DUP V128:$Rn, imm:$Immn), - DUPSub)), + (INS (IMPLICIT_DEF), 0, V128:$Rn, imm:$Immn), dsub)>; } -defm : Neon_INS_elt_ext_pattern; -defm : Neon_INS_elt_ext_pattern; -defm : Neon_INS_elt_ext_pattern; +defm : Neon_INS_elt_ext_pattern; +defm : Neon_INS_elt_ext_pattern; +defm : Neon_INS_elt_ext_pattern; // bitcast of an extract -// f32 bitcast(vector_extract(v4i32 src, 0)) -> EXTRACT_SUBREG(src) -def : Pat<(f32 (bitconvert (i32 (vector_extract v16i8:$src, (i64 0))))), - (EXTRACT_SUBREG V128:$src, bsub)>; -def : Pat<(f32 (bitconvert (i32 (vector_extract v8i16:$src, (i64 0))))), - (EXTRACT_SUBREG V128:$src, hsub)>; +// f32 bitcast(vector_extract(v4i32 src, lane)) -> EXTRACT_SUBREG(INSvi32lane(-, 0, src, lane)) +def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, imm:$Immd)))), + (EXTRACT_SUBREG (INSvi32lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), ssub)>; def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, (i64 0))))), (EXTRACT_SUBREG V128:$src, ssub)>; +def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, imm:$Immd)))), + (EXTRACT_SUBREG (INSvi64lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), dsub)>; def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, (i64 0))))), (EXTRACT_SUBREG V128:$src, dsub)>; -// f32 bitcast(vector_extract(v4i32 src, lane)) -> DUPi32(src, lane) -def : Pat<(f32 (bitconvert (i32 (vector_extract v16i8:$src, imm:$Immd)))), - (EXTRACT_SUBREG (v16i8 (SUBREG_TO_REG (i64 0), (DUPi8 V128:$src, imm:$Immd), bsub)), ssub)>; -def : Pat<(f32 (bitconvert (i32 (vector_extract v8i16:$src, imm:$Immd)))), - (EXTRACT_SUBREG (v8i16 (SUBREG_TO_REG (i64 0), (DUPi16 V128:$src, imm:$Immd), hsub)), ssub)>; -def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, imm:$Immd)))), - (DUPi32 V128:$src, imm:$Immd)>; -def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, imm:$Immd)))), - (DUPi64 V128:$src, imm:$Immd)>; - // Floating point vector extractions are codegen'd as either a sequence of // subregister extractions, or a MOV (aka DUP here) if // the lane number is anything other than zero. diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index a4bcd6847c4f0..d6bd59adef03b 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -2141,12 +2141,12 @@ let Predicates = [HasSVE_or_SME] in { defm WHILELT_PWW : sve_int_while4_rr<0b010, "whilelt", int_aarch64_sve_whilelt, int_aarch64_sve_whilegt>; defm WHILELE_PWW : sve_int_while4_rr<0b011, "whilele", int_aarch64_sve_whilele, null_frag>; - defm WHILELO_PWW : sve_int_while4_rr<0b110, "whilelo", get_active_lane_mask, int_aarch64_sve_whilehi>; + defm WHILELO_PWW : sve_int_while4_rr<0b110, "whilelo", int_aarch64_sve_whilelo, int_aarch64_sve_whilehi>; defm WHILELS_PWW : sve_int_while4_rr<0b111, "whilels", int_aarch64_sve_whilels, null_frag>; defm WHILELT_PXX : sve_int_while8_rr<0b010, "whilelt", int_aarch64_sve_whilelt, int_aarch64_sve_whilegt>; defm WHILELE_PXX : sve_int_while8_rr<0b011, "whilele", int_aarch64_sve_whilele, null_frag>; - defm WHILELO_PXX : sve_int_while8_rr<0b110, "whilelo", get_active_lane_mask, int_aarch64_sve_whilehi>; + defm WHILELO_PXX : sve_int_while8_rr<0b110, "whilelo", int_aarch64_sve_whilelo, int_aarch64_sve_whilehi>; defm WHILELS_PXX : sve_int_while8_rr<0b111, "whilels", int_aarch64_sve_whilels, null_frag>; def CTERMEQ_WW : sve_int_cterm<0b0, 0b0, "ctermeq", GPR32>; @@ -3459,10 +3459,16 @@ let Predicates = [HasSVE_or_SME] in { // Alternative case where insertelement is just scalar_to_vector rather than vector_insert. def : Pat<(v1f64 (scalar_to_vector (f64 (vector_extract nxv2f64:$vec, VectorIndexD:$index)))), - (DUPi64 (EXTRACT_SUBREG nxv2f64:$vec, zsub), VectorIndexD:$index)>; + (EXTRACT_SUBREG + (INSvi64lane (IMPLICIT_DEF), (i64 0), + (EXTRACT_SUBREG nxv2f64:$vec, zsub), VectorIndexD:$index), + dsub)>; def : Pat<(v1i64 (scalar_to_vector (i64 (vector_extract nxv2i64:$vec, VectorIndexD:$index)))), - (DUPi64 (EXTRACT_SUBREG nxv2i64:$vec, zsub), VectorIndexD:$index)>; + (EXTRACT_SUBREG + (INSvi64lane (IMPLICIT_DEF), (i64 0), + (EXTRACT_SUBREG nxv2i64:$vec, zsub), VectorIndexD:$index), + dsub)>; } // End HasNEON let Predicates = [HasNEON] in { @@ -3992,12 +3998,12 @@ let Predicates = [HasSVE2_or_SME] in { defm WHILEGE_PWW : sve_int_while4_rr<0b000, "whilege", int_aarch64_sve_whilege, null_frag>; defm WHILEGT_PWW : sve_int_while4_rr<0b001, "whilegt", int_aarch64_sve_whilegt, int_aarch64_sve_whilelt>; defm WHILEHS_PWW : sve_int_while4_rr<0b100, "whilehs", int_aarch64_sve_whilehs, null_frag>; - defm WHILEHI_PWW : sve_int_while4_rr<0b101, "whilehi", int_aarch64_sve_whilehi, get_active_lane_mask>; + defm WHILEHI_PWW : sve_int_while4_rr<0b101, "whilehi", int_aarch64_sve_whilehi, int_aarch64_sve_whilelo>; defm WHILEGE_PXX : sve_int_while8_rr<0b000, "whilege", int_aarch64_sve_whilege, null_frag>; defm WHILEGT_PXX : sve_int_while8_rr<0b001, "whilegt", int_aarch64_sve_whilegt, int_aarch64_sve_whilelt>; defm WHILEHS_PXX : sve_int_while8_rr<0b100, "whilehs", int_aarch64_sve_whilehs, null_frag>; - defm WHILEHI_PXX : sve_int_while8_rr<0b101, "whilehi", int_aarch64_sve_whilehi, get_active_lane_mask>; + defm WHILEHI_PXX : sve_int_while8_rr<0b101, "whilehi", int_aarch64_sve_whilehi, int_aarch64_sve_whilelo>; // SVE2 pointer conflict compare defm WHILEWR_PXX : sve2_int_while_rr<0b0, "whilewr", "int_aarch64_sve_whilewr">; diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 97f8569b63529..97e4993d52b4f 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -268,21 +268,22 @@ const FeatureBitset AArch64TTIImpl::InlineInverseFeatures = { bool AArch64TTIImpl::areInlineCompatible(const Function *Caller, const Function *Callee) const { - SMECallAttrs CallAttrs(*Caller, *Callee); + SMEAttrs CallerAttrs(*Caller), CalleeAttrs(*Callee); // When inlining, we should consider the body of the function, not the // interface. - if (CallAttrs.callee().hasStreamingBody()) { - CallAttrs.callee().set(SMEAttrs::SM_Compatible, false); - CallAttrs.callee().set(SMEAttrs::SM_Enabled, true); + if (CalleeAttrs.hasStreamingBody()) { + CalleeAttrs.set(SMEAttrs::SM_Compatible, false); + CalleeAttrs.set(SMEAttrs::SM_Enabled, true); } - if (CallAttrs.callee().isNewZA() || CallAttrs.callee().isNewZT0()) + if (CalleeAttrs.isNewZA() || CalleeAttrs.isNewZT0()) return false; - if (CallAttrs.requiresLazySave() || CallAttrs.requiresSMChange() || - CallAttrs.requiresPreservingZT0() || - CallAttrs.requiresPreservingAllZAState()) { + if (CallerAttrs.requiresLazySave(CalleeAttrs) || + CallerAttrs.requiresSMChange(CalleeAttrs) || + CallerAttrs.requiresPreservingZT0(CalleeAttrs) || + CallerAttrs.requiresPreservingAllZAState(CalleeAttrs)) { if (hasPossibleIncompatibleOps(Callee)) return false; } @@ -348,14 +349,12 @@ AArch64TTIImpl::getInlineCallPenalty(const Function *F, const CallBase &Call, // streaming-mode change, and the call to G from F would also require a // streaming-mode change, then there is benefit to do the streaming-mode // change only once and avoid inlining of G into F. - SMEAttrs FAttrs(*F); - SMECallAttrs CallAttrs(Call); - - if (SMECallAttrs(FAttrs, CallAttrs.callee()).requiresSMChange()) { + SMEAttrs CalleeAttrs(Call); + if (FAttrs.requiresSMChange(CalleeAttrs)) { if (F == Call.getCaller()) // (1) return CallPenaltyChangeSM * DefaultCallPenalty; - if (SMECallAttrs(FAttrs, CallAttrs.caller()).requiresSMChange()) // (2) + if (FAttrs.requiresSMChange(SMEAttrs(*Call.getCaller()))) // (2) return InlineCallPenaltyChangeSM * DefaultCallPenalty; } diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp index 271094f935e0e..76d2ac6a601e5 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp +++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp @@ -27,14 +27,15 @@ void SMEAttrs::set(unsigned M, bool Enable) { "ZA_New and SME_ABI_Routine are mutually exclusive"); assert( - (isNewZA() + isInZA() + isOutZA() + isInOutZA() + isPreservesZA()) <= 1 && + (!sharesZA() || + (isNewZA() ^ isInZA() ^ isInOutZA() ^ isOutZA() ^ isPreservesZA())) && "Attributes 'aarch64_new_za', 'aarch64_in_za', 'aarch64_out_za', " "'aarch64_inout_za' and 'aarch64_preserves_za' are mutually exclusive"); // ZT0 Attrs assert( - (isNewZT0() + isInZT0() + isOutZT0() + isInOutZT0() + isPreservesZT0()) <= - 1 && + (!sharesZT0() || (isNewZT0() ^ isInZT0() ^ isInOutZT0() ^ isOutZT0() ^ + isPreservesZT0())) && "Attributes 'aarch64_new_zt0', 'aarch64_in_zt0', 'aarch64_out_zt0', " "'aarch64_inout_zt0' and 'aarch64_preserves_zt0' are mutually exclusive"); @@ -43,6 +44,27 @@ void SMEAttrs::set(unsigned M, bool Enable) { "interface"); } +SMEAttrs::SMEAttrs(const CallBase &CB) { + *this = SMEAttrs(CB.getAttributes()); + if (auto *F = CB.getCalledFunction()) { + set(SMEAttrs(*F).Bitmask | SMEAttrs(F->getName()).Bitmask); + } +} + +SMEAttrs::SMEAttrs(StringRef FuncName) : Bitmask(0) { + if (FuncName == "__arm_tpidr2_save" || FuncName == "__arm_sme_state") + Bitmask |= (SMEAttrs::SM_Compatible | SMEAttrs::SME_ABI_Routine); + if (FuncName == "__arm_tpidr2_restore") + Bitmask |= SMEAttrs::SM_Compatible | encodeZAState(StateValue::In) | + SMEAttrs::SME_ABI_Routine; + if (FuncName == "__arm_sc_memcpy" || FuncName == "__arm_sc_memset" || + FuncName == "__arm_sc_memmove" || FuncName == "__arm_sc_memchr") + Bitmask |= SMEAttrs::SM_Compatible; + if (FuncName == "__arm_sme_save" || FuncName == "__arm_sme_restore" || + FuncName == "__arm_sme_state_size") + Bitmask |= SMEAttrs::SM_Compatible | SMEAttrs::SME_ABI_Routine; +} + SMEAttrs::SMEAttrs(const AttributeList &Attrs) { Bitmask = 0; if (Attrs.hasFnAttr("aarch64_pstate_sm_enabled")) @@ -77,48 +99,17 @@ SMEAttrs::SMEAttrs(const AttributeList &Attrs) { Bitmask |= encodeZT0State(StateValue::New); } -void SMEAttrs::addKnownFunctionAttrs(StringRef FuncName) { - unsigned KnownAttrs = SMEAttrs::Normal; - if (FuncName == "__arm_tpidr2_save" || FuncName == "__arm_sme_state") - KnownAttrs |= (SMEAttrs::SM_Compatible | SMEAttrs::SME_ABI_Routine); - if (FuncName == "__arm_tpidr2_restore") - KnownAttrs |= SMEAttrs::SM_Compatible | encodeZAState(StateValue::In) | - SMEAttrs::SME_ABI_Routine; - if (FuncName == "__arm_sc_memcpy" || FuncName == "__arm_sc_memset" || - FuncName == "__arm_sc_memmove" || FuncName == "__arm_sc_memchr") - KnownAttrs |= SMEAttrs::SM_Compatible; - if (FuncName == "__arm_sme_save" || FuncName == "__arm_sme_restore" || - FuncName == "__arm_sme_state_size") - KnownAttrs |= SMEAttrs::SM_Compatible | SMEAttrs::SME_ABI_Routine; - set(KnownAttrs); -} - -bool SMECallAttrs::requiresSMChange() const { - if (callee().hasStreamingCompatibleInterface()) +bool SMEAttrs::requiresSMChange(const SMEAttrs &Callee) const { + if (Callee.hasStreamingCompatibleInterface()) return false; // Both non-streaming - if (caller().hasNonStreamingInterfaceAndBody() && - callee().hasNonStreamingInterface()) + if (hasNonStreamingInterfaceAndBody() && Callee.hasNonStreamingInterface()) return false; // Both streaming - if (caller().hasStreamingInterfaceOrBody() && - callee().hasStreamingInterface()) + if (hasStreamingInterfaceOrBody() && Callee.hasStreamingInterface()) return false; return true; } - -SMECallAttrs::SMECallAttrs(const CallBase &CB) - : CallerFn(*CB.getFunction()), CalledFn(SMEAttrs::Normal), - Callsite(CB.getAttributes()), IsIndirect(CB.isIndirectCall()) { - if (auto *CalledFunction = CB.getCalledFunction()) - CalledFn = SMEAttrs(*CalledFunction, SMEAttrs::InferAttrsFromName::Yes); - - // FIXME: We probably should not allow SME attributes on direct calls but - // clang duplicates streaming mode attributes at each callsite. - assert((IsIndirect || - ((Callsite.withoutPerCallsiteFlags() | CalledFn) == CalledFn)) && - "SME attributes at callsite do not match declaration"); -} diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h index f1be0ecbee7ed..1691d4fec8b68 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h +++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h @@ -18,9 +18,12 @@ class CallBase; class AttributeList; /// SMEAttrs is a utility class to parse the SME ACLE attributes on functions. -/// It helps determine a function's requirements for PSTATE.ZA and PSTATE.SM. +/// It helps determine a function's requirements for PSTATE.ZA and PSTATE.SM. It +/// has interfaces to query whether a streaming mode change or lazy-save +/// mechanism is required when going from one function to another (e.g. through +/// a call). class SMEAttrs { - unsigned Bitmask = Normal; + unsigned Bitmask; public: enum class StateValue { @@ -40,25 +43,18 @@ class SMEAttrs { SM_Body = 1 << 2, // aarch64_pstate_sm_body SME_ABI_Routine = 1 << 3, // Used for SME ABI routines to avoid lazy saves ZA_State_Agnostic = 1 << 4, - ZT0_Undef = 1 << 5, // Use to mark ZT0 as undef to avoid spills + ZT0_Undef = 1 << 5, // Use to mark ZT0 as undef to avoid spills ZA_Shift = 6, ZA_Mask = 0b111 << ZA_Shift, ZT0_Shift = 9, - ZT0_Mask = 0b111 << ZT0_Shift, - CallSiteFlags_Mask = ZT0_Undef + ZT0_Mask = 0b111 << ZT0_Shift }; - enum class InferAttrsFromName { No, Yes }; - - SMEAttrs() = default; - SMEAttrs(unsigned Mask) { set(Mask); } - SMEAttrs(const Function &F, InferAttrsFromName Infer = InferAttrsFromName::No) - : SMEAttrs(F.getAttributes()) { - if (Infer == InferAttrsFromName::Yes) - addKnownFunctionAttrs(F.getName()); - } + SMEAttrs(unsigned Mask = Normal) : Bitmask(0) { set(Mask); } + SMEAttrs(const Function &F) : SMEAttrs(F.getAttributes()) {} + SMEAttrs(const CallBase &CB); SMEAttrs(const AttributeList &L); - SMEAttrs(StringRef FuncName) { addKnownFunctionAttrs(FuncName); }; + SMEAttrs(StringRef FuncName); void set(unsigned M, bool Enable = true); @@ -78,6 +74,10 @@ class SMEAttrs { return hasNonStreamingInterface() && !hasStreamingBody(); } + /// \return true if a call from Caller -> Callee requires a change in + /// streaming mode. + bool requiresSMChange(const SMEAttrs &Callee) const; + // Interfaces to query ZA static StateValue decodeZAState(unsigned Bitmask) { return static_cast((Bitmask & ZA_Mask) >> ZA_Shift); @@ -104,7 +104,10 @@ class SMEAttrs { return !hasSharedZAInterface() && !hasAgnosticZAInterface(); } bool hasZAState() const { return isNewZA() || sharesZA(); } - bool isSMEABIRoutine() const { return Bitmask & SME_ABI_Routine; } + bool requiresLazySave(const SMEAttrs &Callee) const { + return hasZAState() && Callee.hasPrivateZAInterface() && + !(Callee.Bitmask & SME_ABI_Routine); + } // Interfaces to query ZT0 State static StateValue decodeZT0State(unsigned Bitmask) { @@ -123,83 +126,27 @@ class SMEAttrs { bool isPreservesZT0() const { return decodeZT0State(Bitmask) == StateValue::Preserved; } - bool hasUndefZT0() const { return Bitmask & ZT0_Undef; } + bool isUndefZT0() const { return Bitmask & ZT0_Undef; } bool sharesZT0() const { StateValue State = decodeZT0State(Bitmask); return State == StateValue::In || State == StateValue::Out || State == StateValue::InOut || State == StateValue::Preserved; } bool hasZT0State() const { return isNewZT0() || sharesZT0(); } - - SMEAttrs operator|(SMEAttrs Other) const { - SMEAttrs Merged(*this); - Merged.set(Other.Bitmask); - return Merged; + bool requiresPreservingZT0(const SMEAttrs &Callee) const { + return hasZT0State() && !Callee.isUndefZT0() && !Callee.sharesZT0() && + !Callee.hasAgnosticZAInterface(); } - - SMEAttrs withoutPerCallsiteFlags() const { - return (Bitmask & ~CallSiteFlags_Mask); + bool requiresDisablingZABeforeCall(const SMEAttrs &Callee) const { + return hasZT0State() && !hasZAState() && Callee.hasPrivateZAInterface() && + !(Callee.Bitmask & SME_ABI_Routine); } - - bool operator==(SMEAttrs const &Other) const { - return Bitmask == Other.Bitmask; + bool requiresEnablingZAAfterCall(const SMEAttrs &Callee) const { + return requiresLazySave(Callee) || requiresDisablingZABeforeCall(Callee); } - -private: - void addKnownFunctionAttrs(StringRef FuncName); -}; - -/// SMECallAttrs is a utility class to hold the SMEAttrs for a callsite. It has -/// interfaces to query whether a streaming mode change or lazy-save mechanism -/// is required when going from one function to another (e.g. through a call). -class SMECallAttrs { - SMEAttrs CallerFn; - SMEAttrs CalledFn; - SMEAttrs Callsite; - bool IsIndirect = false; - -public: - SMECallAttrs(SMEAttrs Caller, SMEAttrs Callee, - SMEAttrs Callsite = SMEAttrs::Normal) - : CallerFn(Caller), CalledFn(Callee), Callsite(Callsite) {} - - SMECallAttrs(const CallBase &CB); - - SMEAttrs &caller() { return CallerFn; } - SMEAttrs &callee() { return IsIndirect ? Callsite : CalledFn; } - SMEAttrs &callsite() { return Callsite; } - SMEAttrs const &caller() const { return CallerFn; } - SMEAttrs const &callee() const { - return const_cast(this)->callee(); - } - SMEAttrs const &callsite() const { return Callsite; } - - /// \return true if a call from Caller -> Callee requires a change in - /// streaming mode. - bool requiresSMChange() const; - - bool requiresLazySave() const { - return caller().hasZAState() && callee().hasPrivateZAInterface() && - !callee().isSMEABIRoutine(); - } - - bool requiresPreservingZT0() const { - return caller().hasZT0State() && !callsite().hasUndefZT0() && - !callee().sharesZT0() && !callee().hasAgnosticZAInterface(); - } - - bool requiresDisablingZABeforeCall() const { - return caller().hasZT0State() && !caller().hasZAState() && - callee().hasPrivateZAInterface() && !callee().isSMEABIRoutine(); - } - - bool requiresEnablingZAAfterCall() const { - return requiresLazySave() || requiresDisablingZABeforeCall(); - } - - bool requiresPreservingAllZAState() const { - return caller().hasAgnosticZAInterface() && - !callee().hasAgnosticZAInterface() && !callee().isSMEABIRoutine(); + bool requiresPreservingAllZAState(const SMEAttrs &Callee) const { + return hasAgnosticZAInterface() && !Callee.hasAgnosticZAInterface() && + !(Callee.Bitmask & SME_ABI_Routine); } }; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp index f79069bd6d78b..34ba53cbe0f9e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp @@ -1038,12 +1038,6 @@ class AMDGPULowerModuleLDS { } bool runOnModule(Module &M) { - // Check if we've already lowered this module. The pass may run more - // than once in the LTO pipeline, and multiple runs aren't supported. - if (M.getModuleFlag("amdgpu.lowered_lds")) - return false; - M.addModuleFlag(Module::ModFlagBehavior::Error, "amdgpu.lowered_lds", 1); - CallGraph CG = CallGraph(M); bool Changed = superAlignLDSGlobals(M); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 6993095193467..79667e5ff9285 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1972,25 +1972,81 @@ class getIns64 { - dag src0 = !if(!ge(NumSrcArgs, 1), - !if (HasModifiers, - (ins Src0Mod:$src0_modifiers, Src0RC:$src0), - (ins Src0RC:$src0)), - (ins)); - dag src1 = !if(!ge(NumSrcArgs, 2), - !if (HasModifiers, - (ins Src1Mod:$src1_modifiers, Src1RC:$src1), - (ins Src1RC:$src1)), - (ins)); - dag src2 = !if(!ge(NumSrcArgs, 3), - !if (HasSrc2Mods, - (ins Src2Mod:$src2_modifiers, Src2RC:$src2), - (ins Src2RC:$src2)), - (ins)); - dag clamp = !if(HasClamp, (ins Clamp0:$clamp), (ins)); - dag omod = !if(HasOMod, (ins omod0:$omod), (ins)); - - dag ret = !con(src0, src1, src2, clamp, omod); + + dag ret = + !if (!eq(NumSrcArgs, 0), + // VOP1 without input operands (V_NOP, V_CLREXCP) + (ins), + /* else */ + !if (!eq(NumSrcArgs, 1), + !if (HasModifiers, + // VOP1 with modifiers + !if(HasOMod, + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Clamp0:$clamp, omod0:$omod), + !if (HasClamp, + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, Clamp0:$clamp), + (ins Src0Mod:$src0_modifiers, Src0RC:$src0))) + /* else */, + // VOP1 without modifiers + !if(HasOMod, + (ins Src0RC:$src0, Clamp0:$clamp, omod0:$omod), + !if (HasClamp, + (ins Src0RC:$src0, Clamp0:$clamp), + (ins Src0RC:$src0))) + /* endif */ ), + !if (!eq(NumSrcArgs, 2), + !if (HasModifiers, + // VOP 2 with modifiers + !if(HasOMod, + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + Clamp0:$clamp, omod0:$omod), + !con((ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1), + !if(HasClamp, (ins Clamp0:$clamp), (ins)))) + /* else */, + // VOP2 without modifiers + !if (HasClamp, + (ins Src0RC:$src0, Src1RC:$src1, Clamp0:$clamp), + (ins Src0RC:$src0, Src1RC:$src1)) + + /* endif */ ) + /* NumSrcArgs == 3 */, + !if (HasModifiers, + !if (HasSrc2Mods, + // VOP3 with modifiers + !if (HasOMod, + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + Src2Mod:$src2_modifiers, Src2RC:$src2, + Clamp0:$clamp, omod0:$omod), + !if (HasClamp, + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + Src2Mod:$src2_modifiers, Src2RC:$src2, + Clamp0:$clamp), + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + Src2Mod:$src2_modifiers, Src2RC:$src2))), + // VOP3 with modifiers except src2 + !if (HasOMod, + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + Src2RC:$src2, Clamp0:$clamp, omod0:$omod), + !if (HasClamp, + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + Src2RC:$src2, Clamp0:$clamp), + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + Src2RC:$src2)))) + /* else */, + // VOP3 without modifiers + !if (HasClamp, + (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2, Clamp0:$clamp), + (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2)) + /* endif */ )))); } class getInsVOP3Base Default, bool OnlyFirstRequired) { if (auto Attr = getIntegerPairAttribute(F, Name, OnlyFirstRequired)) - return {Attr->first, Attr->second.value_or(Default.second)}; + return {Attr->first, Attr->second ? *(Attr->second) : Default.second}; return Default; } diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 0c7e20fc1ebf3..30cef69aa29c4 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -439,7 +439,7 @@ class VOP_MAC : VOPProfile <[vt0, vt1, vt1, v // Src2 must accept the same operand types as vdst, namely VGPRs only let Src2RC64 = getVOP3VRegForVT.ret; let Ins64 = getIns64.ret; let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, Src1ModDPP:$src1_modifiers, Src1DPP:$src1, @@ -448,7 +448,7 @@ class VOP_MAC : VOPProfile <[vt0, vt1, vt1, v DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl); let InsDPP16 = !con(InsDPP, (ins Dpp16FI:$fi)); let InsVOP3Base = getInsVOP3Base, 3, - HasClamp, HasModifiers, HasModifiers, HasOMod, + 0, HasModifiers, HasModifiers, HasOMod, Src0ModVOP3DPP, Src1ModVOP3DPP, Src2Mod, HasOpSel>.ret; // We need a dummy src2 tied to dst to track the use of that register for s_delay_alu let InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X, VGPRSrc_32:$src2X); diff --git a/llvm/lib/Target/DirectX/DXILShaderFlags.cpp b/llvm/lib/Target/DirectX/DXILShaderFlags.cpp index 8bdaf68e18e70..b50a9b5d6051c 100644 --- a/llvm/lib/Target/DirectX/DXILShaderFlags.cpp +++ b/llvm/lib/Target/DirectX/DXILShaderFlags.cpp @@ -142,13 +142,6 @@ void ModuleShaderFlags::updateFunctionFlags(ComputedShaderFlags &CSF, } } - if (CSF.LowPrecisionPresent) { - if (CanSetNativeLowPrecisionMode) - CSF.NativeLowPrecision = true; - else - CSF.MinimumPrecision = true; - } - if (!CSF.Int64Ops) CSF.Int64Ops = I.getType()->isIntegerTy(64); @@ -213,20 +206,13 @@ void ModuleShaderFlags::initialize(Module &M, DXILResourceTypeMap &DRTM, const ModuleMetadataInfo &MMDI) { CanSetResMayNotAlias = MMDI.DXILVersion >= VersionTuple(1, 7); - // The command line option -res-may-alias will set the dx.resmayalias module - // flag to 1, thereby disabling the ability to set the ResMayNotAlias flag - if (auto *ResMayAlias = mdconst::extract_or_null( - M.getModuleFlag("dx.resmayalias"))) - CanSetResMayNotAlias = !ResMayAlias->getValue().getBoolValue(); - // NativeLowPrecisionMode can only be set when the command line option - // -enable-16bit-types is provided. This is indicated by the dx.nativelowprec - // module flag being set - CanSetNativeLowPrecisionMode = false; - if (auto *NativeLowPrec = mdconst::extract_or_null( - M.getModuleFlag("dx.nativelowprec"))) - if (MMDI.ShaderModelVersion >= VersionTuple(6, 2)) - CanSetNativeLowPrecisionMode = NativeLowPrec->getValue().getBoolValue(); + // Check if -res-may-alias was provided on the command line. + // The command line option will set the dx.resmayalias module flag to 1. + if (auto *RMA = mdconst::extract_or_null( + M.getModuleFlag("dx.resmayalias"))) + if (RMA->getValue() != 0) + CanSetResMayNotAlias = false; CallGraph CG(M); @@ -252,6 +238,18 @@ void ModuleShaderFlags::initialize(Module &M, DXILResourceTypeMap &DRTM, continue; } + // Set ResMayNotAlias to true if DXIL validator version < 1.8 and there + // are UAVs present globally. + if (CanSetResMayNotAlias && MMDI.ValidatorVersion < VersionTuple(1, 8)) + SCCSF.ResMayNotAlias = !DRM.uavs().empty(); + + // Set UseNativeLowPrecision using dx.nativelowprec module metadata + if (auto *NativeLowPrec = mdconst::extract_or_null( + M.getModuleFlag("dx.nativelowprec"))) + if (MMDI.ShaderModelVersion >= VersionTuple(6, 2) && + NativeLowPrec->getValue() != 0) + SCCSF.UseNativeLowPrecision = true; + ComputedShaderFlags CSF; for (const auto &BB : *F) for (const auto &I : BB) @@ -288,17 +286,6 @@ void ModuleShaderFlags::initialize(Module &M, DXILResourceTypeMap &DRTM, *(EntryFunProps.Entry), "Inconsistent optnone attribute ")); } - // Set ResMayNotAlias to true if DXIL validator version < 1.8 and there - // are UAVs present globally. - if (CanSetResMayNotAlias && MMDI.ValidatorVersion < VersionTuple(1, 8)) - CombinedSFMask.ResMayNotAlias = !DRM.uavs().empty(); - - // Set the module flag that enables native low-precision execution mode. This - // is needed even if the module does not use 16-bit types because a - // corresponding debug module may include 16-bit types, and tools that use the - // debug module may expect it to have the same flags as the original - CombinedSFMask.NativeLowPrecisionMode = CanSetNativeLowPrecisionMode; - // Set the Max64UAVs flag if the number of UAVs is > 8 uint32_t NumUAVs = 0; for (auto &UAV : DRM.uavs()) diff --git a/llvm/lib/Target/DirectX/DXILShaderFlags.h b/llvm/lib/Target/DirectX/DXILShaderFlags.h index c4eef4e708cfd..0e0bd0036349e 100644 --- a/llvm/lib/Target/DirectX/DXILShaderFlags.h +++ b/llvm/lib/Target/DirectX/DXILShaderFlags.h @@ -91,10 +91,7 @@ struct ModuleShaderFlags { const ComputedShaderFlags &getCombinedFlags() const { return CombinedSFMask; } private: - // Booleans set by module flags - bool CanSetResMayNotAlias; // dx.resmayalias - bool CanSetNativeLowPrecisionMode; // dx.nativelowprec - + bool CanSetResMayNotAlias; /// Map of Function-Shader Flag Mask pairs representing properties of each of /// the functions in the module. Shader Flags of each function represent both /// module-level and function-level flags diff --git a/llvm/lib/Target/M68k/CMakeLists.txt b/llvm/lib/Target/M68k/CMakeLists.txt index 7005df4fb8a82..1661dccece3dd 100644 --- a/llvm/lib/Target/M68k/CMakeLists.txt +++ b/llvm/lib/Target/M68k/CMakeLists.txt @@ -6,7 +6,6 @@ tablegen(LLVM M68kGenGlobalISel.inc -gen-global-isel) tablegen(LLVM M68kGenRegisterInfo.inc -gen-register-info) tablegen(LLVM M68kGenRegisterBank.inc -gen-register-bank) tablegen(LLVM M68kGenInstrInfo.inc -gen-instr-info) -tablegen(LLVM M68kGenSDNodeInfo.inc -gen-sd-node-info) tablegen(LLVM M68kGenSubtargetInfo.inc -gen-subtarget) tablegen(LLVM M68kGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM M68kGenMCPseudoLowering.inc -gen-pseudo-lowering) @@ -33,7 +32,6 @@ add_llvm_target(M68kCodeGen M68kMachineFunction.cpp M68kMCInstLower.cpp M68kRegisterInfo.cpp - M68kSelectionDAGInfo.cpp M68kSubtarget.cpp M68kTargetMachine.cpp M68kTargetObjectFile.cpp diff --git a/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp b/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp index 9c3d61ec60e00..53c144c8fa79a 100644 --- a/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp +++ b/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp @@ -15,8 +15,8 @@ #include "M68kMachineFunction.h" #include "M68kRegisterInfo.h" -#include "M68kSelectionDAGInfo.h" #include "M68kTargetMachine.h" + #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" diff --git a/llvm/lib/Target/M68k/M68kISelLowering.cpp b/llvm/lib/Target/M68k/M68kISelLowering.cpp index 9d3ab606ab8cd..616b1f622619c 100644 --- a/llvm/lib/Target/M68k/M68kISelLowering.cpp +++ b/llvm/lib/Target/M68k/M68kISelLowering.cpp @@ -15,7 +15,6 @@ #include "M68kISelLowering.h" #include "M68kCallingConv.h" #include "M68kMachineFunction.h" -#include "M68kSelectionDAGInfo.h" #include "M68kSubtarget.h" #include "M68kTargetMachine.h" #include "M68kTargetObjectFile.h" @@ -3642,6 +3641,64 @@ SDValue M68kTargetLowering::PerformDAGCombine(SDNode *N, return SDValue(); } +//===----------------------------------------------------------------------===// +// M68kISD Node Names +//===----------------------------------------------------------------------===// +const char *M68kTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (Opcode) { + case M68kISD::CALL: + return "M68kISD::CALL"; + case M68kISD::TAIL_CALL: + return "M68kISD::TAIL_CALL"; + case M68kISD::RET: + return "M68kISD::RET"; + case M68kISD::TC_RETURN: + return "M68kISD::TC_RETURN"; + case M68kISD::ADD: + return "M68kISD::ADD"; + case M68kISD::SUB: + return "M68kISD::SUB"; + case M68kISD::ADDX: + return "M68kISD::ADDX"; + case M68kISD::SUBX: + return "M68kISD::SUBX"; + case M68kISD::SMUL: + return "M68kISD::SMUL"; + case M68kISD::UMUL: + return "M68kISD::UMUL"; + case M68kISD::OR: + return "M68kISD::OR"; + case M68kISD::XOR: + return "M68kISD::XOR"; + case M68kISD::AND: + return "M68kISD::AND"; + case M68kISD::CMP: + return "M68kISD::CMP"; + case M68kISD::BTST: + return "M68kISD::BTST"; + case M68kISD::SELECT: + return "M68kISD::SELECT"; + case M68kISD::CMOV: + return "M68kISD::CMOV"; + case M68kISD::BRCOND: + return "M68kISD::BRCOND"; + case M68kISD::SETCC: + return "M68kISD::SETCC"; + case M68kISD::SETCC_CARRY: + return "M68kISD::SETCC_CARRY"; + case M68kISD::GLOBAL_BASE_REG: + return "M68kISD::GLOBAL_BASE_REG"; + case M68kISD::Wrapper: + return "M68kISD::Wrapper"; + case M68kISD::WrapperPC: + return "M68kISD::WrapperPC"; + case M68kISD::SEG_ALLOCA: + return "M68kISD::SEG_ALLOCA"; + default: + return NULL; + } +} + CCAssignFn *M68kTargetLowering::getCCAssignFn(CallingConv::ID CC, bool Return, bool IsVarArg) const { if (Return) diff --git a/llvm/lib/Target/M68k/M68kISelLowering.h b/llvm/lib/Target/M68k/M68kISelLowering.h index 3774f93c851e8..b646f7d7fb2ba 100644 --- a/llvm/lib/Target/M68k/M68kISelLowering.h +++ b/llvm/lib/Target/M68k/M68kISelLowering.h @@ -25,6 +25,77 @@ #include namespace llvm { +namespace M68kISD { + +/// M68k Specific DAG nodes +enum NodeType { + /// Start the numbering from where ISD NodeType finishes. + FIRST_NUMBER = ISD::BUILTIN_OP_END, + + CALL, + RET, + TAIL_CALL, + TC_RETURN, + + /// M68k compare and logical compare instructions. Subtracts the source + /// operand from the destination data register and sets the condition + /// codes according to the result. Immediate always goes first. + CMP, + + /// M68k bit-test instructions. + BTST, + + /// M68k Select + SELECT, + + /// M68k SetCC. Operand 0 is condition code, and operand 1 is the CCR + /// operand, usually produced by a CMP instruction. + SETCC, + + // Same as SETCC except it's materialized with a subx and the value is all + // one's or all zero's. + SETCC_CARRY, // R = carry_bit ? ~0 : 0 + + /// M68k conditional moves. Operand 0 and operand 1 are the two values + /// to select from. Operand 2 is the condition code, and operand 3 is the + /// flag operand produced by a CMP or TEST instruction. It also writes a + /// flag result. + CMOV, + + /// M68k conditional branches. Operand 0 is the chain operand, operand 1 + /// is the block to branch if condition is true, operand 2 is the + /// condition code, and operand 3 is the flag operand produced by a CMP + /// or TEST instruction. + BRCOND, + + // Arithmetic operations with CCR results. + ADD, + SUB, + ADDX, + SUBX, + SMUL, + UMUL, + OR, + XOR, + AND, + + // GlobalBaseReg, + GLOBAL_BASE_REG, + + /// A wrapper node for TargetConstantPool, + /// TargetExternalSymbol, and TargetGlobalAddress. + Wrapper, + + /// Special wrapper used under M68k PIC mode for PC + /// relative displacements. + WrapperPC, + + // For allocating variable amounts of stack space when using + // segmented stacks. Check if the current stacklet has enough space, and + // falls back to heap allocation if not. + SEG_ALLOCA, +}; +} // namespace M68kISD /// Define some predicates that are used for node matching. namespace M68k { @@ -53,6 +124,8 @@ class M68kTargetLowering : public TargetLowering { static const M68kTargetLowering *create(const M68kTargetMachine &TM, const M68kSubtarget &STI); + const char *getTargetNodeName(unsigned Opcode) const override; + /// Return the value type to use for ISD::SETCC. EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override; diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.td b/llvm/lib/Target/M68k/M68kInstrInfo.td index e9213f20c68e7..dca774e94b9b5 100644 --- a/llvm/lib/Target/M68k/M68kInstrInfo.td +++ b/llvm/lib/Target/M68k/M68kInstrInfo.td @@ -112,18 +112,9 @@ def MxRet : SDNode<"M68kISD::RET", MxSDT_Ret, def MxTCRet : SDNode<"M68kISD::TC_RETURN", MxSDT_TCRet, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; -def MxGlobalBaseReg : SDNode<"M68kISD::GLOBAL_BASE_REG", - SDTypeProfile<1, 0, [SDTCisVT<0, iPTR>]>>; - -// A wrapper node for TargetConstantPool, -// TargetExternalSymbol, and TargetGlobalAddress. def MxWrapper : SDNode<"M68kISD::Wrapper", MxSDT_Wrapper>; - -// Special wrapper used under M68k PIC mode for PC -// relative displacements. def MxWrapperPC : SDNode<"M68kISD::WrapperPC", MxSDT_Wrapper>; -// Arithmetic operations with CCR results. def MxAdd : SDNode<"M68kISD::ADD", MxSDT_BiArithCCROut, [SDNPCommutative]>; def MxSub : SDNode<"M68kISD::SUB", MxSDT_BiArithCCROut>; def MxOr : SDNode<"M68kISD::OR", MxSDT_BiArithCCROut, [SDNPCommutative]>; @@ -136,37 +127,15 @@ def MxSubX : SDNode<"M68kISD::SUBX", MxSDT_BiArithCCRInOut>; def MxSMul : SDNode<"M68kISD::SMUL", MxSDT_BiArithCCROut, [SDNPCommutative]>; def MxUMul : SDNode<"M68kISD::UMUL", MxSDT_BiArithCCROut, [SDNPCommutative]>; -// M68k compare and logical compare instructions. Subtracts the source -// operand from the destination data register and sets the condition -// codes according to the result. Immediate always goes first. def MxCmp : SDNode<"M68kISD::CMP", MxSDT_CmpTest>; - -// M68k bit-test instructions. def MxBtst : SDNode<"M68kISD::BTST", MxSDT_CmpTest>; -// M68k conditional moves. Operand 0 and operand 1 are the two values -// to select from. Operand 2 is the condition code, and operand 3 is the -// flag operand produced by a CMP or TEST instruction. It also writes a -// flag result. def MxCmov : SDNode<"M68kISD::CMOV", MxSDT_Cmov>; - -// M68k conditional branches. Operand 0 is the chain operand, operand 1 -// is the block to branch if condition is true, operand 2 is the -// condition code, and operand 3 is the flag operand produced by a CMP -// or TEST instruction. def MxBrCond : SDNode<"M68kISD::BRCOND", MxSDT_BrCond, [SDNPHasChain]>; - -// M68k SetCC. Operand 0 is condition code, and operand 1 is the CCR -// operand, usually produced by a CMP instruction. def MxSetCC : SDNode<"M68kISD::SETCC", MxSDT_SetCC>; - -// Same as SETCC except it's materialized with a subx and the value is all -// one's or all zero's. def MxSetCC_C : SDNode<"M68kISD::SETCC_CARRY", MxSDT_SetCC_C>; -// For allocating variable amounts of stack space when using -// segmented stacks. Check if the current stacklet has enough space, and -// falls back to heap allocation if not. + def MxSegAlloca : SDNode<"M68kISD::SEG_ALLOCA", MxSDT_SEG_ALLOCA, [SDNPHasChain]>; diff --git a/llvm/lib/Target/M68k/M68kSelectionDAGInfo.cpp b/llvm/lib/Target/M68k/M68kSelectionDAGInfo.cpp deleted file mode 100644 index dd1bfdf00af8c..0000000000000 --- a/llvm/lib/Target/M68k/M68kSelectionDAGInfo.cpp +++ /dev/null @@ -1,19 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "M68kSelectionDAGInfo.h" - -#define GET_SDNODE_DESC -#include "M68kGenSDNodeInfo.inc" - -using namespace llvm; - -M68kSelectionDAGInfo::M68kSelectionDAGInfo() - : SelectionDAGGenTargetInfo(M68kGenSDNodeInfo) {} - -M68kSelectionDAGInfo::~M68kSelectionDAGInfo() = default; diff --git a/llvm/lib/Target/M68k/M68kSelectionDAGInfo.h b/llvm/lib/Target/M68k/M68kSelectionDAGInfo.h deleted file mode 100644 index 87a8c08d2591e..0000000000000 --- a/llvm/lib/Target/M68k/M68kSelectionDAGInfo.h +++ /dev/null @@ -1,28 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_M68K_M68KSELECTIONDAGINFO_H -#define LLVM_LIB_TARGET_M68K_M68KSELECTIONDAGINFO_H - -#include "llvm/CodeGen/SelectionDAGTargetInfo.h" - -#define GET_SDNODE_ENUM -#include "M68kGenSDNodeInfo.inc" - -namespace llvm { - -class M68kSelectionDAGInfo : public SelectionDAGGenTargetInfo { -public: - M68kSelectionDAGInfo(); - - ~M68kSelectionDAGInfo() override; -}; - -} // namespace llvm - -#endif // LLVM_LIB_TARGET_M68K_M68KSELECTIONDAGINFO_H diff --git a/llvm/lib/Target/M68k/M68kSubtarget.cpp b/llvm/lib/Target/M68k/M68kSubtarget.cpp index 59d865ff1f4a9..53ec574ae5596 100644 --- a/llvm/lib/Target/M68k/M68kSubtarget.cpp +++ b/llvm/lib/Target/M68k/M68kSubtarget.cpp @@ -15,11 +15,12 @@ #include "GISel/M68kCallLowering.h" #include "GISel/M68kLegalizerInfo.h" #include "GISel/M68kRegisterBankInfo.h" + #include "M68k.h" #include "M68kMachineFunction.h" #include "M68kRegisterInfo.h" -#include "M68kSelectionDAGInfo.h" #include "M68kTargetMachine.h" + #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" @@ -49,12 +50,10 @@ void M68kSubtarget::anchor() {} M68kSubtarget::M68kSubtarget(const Triple &TT, StringRef CPU, StringRef FS, const M68kTargetMachine &TM) - : M68kGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), TM(TM), + : M68kGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), TM(TM), TSInfo(), InstrInfo(initializeSubtargetDependencies(CPU, TT, FS, TM)), FrameLowering(*this, this->getStackAlignment()), TLInfo(TM, *this), TargetTriple(TT) { - TSInfo = std::make_unique(); - CallLoweringInfo.reset(new M68kCallLowering(*getTargetLowering())); Legalizer.reset(new M68kLegalizerInfo(*this)); @@ -63,12 +62,6 @@ M68kSubtarget::M68kSubtarget(const Triple &TT, StringRef CPU, StringRef FS, InstSelector.reset(createM68kInstructionSelector(TM, *this, *RBI)); } -M68kSubtarget::~M68kSubtarget() = default; - -const SelectionDAGTargetInfo *M68kSubtarget::getSelectionDAGInfo() const { - return TSInfo.get(); -} - const CallLowering *M68kSubtarget::getCallLowering() const { return CallLoweringInfo.get(); } diff --git a/llvm/lib/Target/M68k/M68kSubtarget.h b/llvm/lib/Target/M68k/M68kSubtarget.h index 16ca7d2e6d0fd..c08a9786fb27b 100644 --- a/llvm/lib/Target/M68k/M68kSubtarget.h +++ b/llvm/lib/Target/M68k/M68kSubtarget.h @@ -22,6 +22,7 @@ #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/RegisterBankInfo.h" +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCInstrItineraries.h" @@ -62,6 +63,7 @@ class M68kSubtarget : public M68kGenSubtargetInfo { const M68kTargetMachine &TM; + SelectionDAGTargetInfo TSInfo; M68kInstrInfo InstrInfo; M68kFrameLowering FrameLowering; M68kTargetLowering TLInfo; @@ -78,8 +80,6 @@ class M68kSubtarget : public M68kGenSubtargetInfo { M68kSubtarget(const Triple &TT, StringRef CPU, StringRef FS, const M68kTargetMachine &_TM); - ~M68kSubtarget() override; - /// Parses features string setting specified subtarget options. Definition /// of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); @@ -148,6 +148,10 @@ class M68kSubtarget : public M68kGenSubtargetInfo { StringRef FS, const M68kTargetMachine &TM); + const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { + return &TSInfo; + } + const M68kInstrInfo *getInstrInfo() const override { return &InstrInfo; } const M68kFrameLowering *getFrameLowering() const override { @@ -167,9 +171,6 @@ class M68kSubtarget : public M68kGenSubtargetInfo { } protected: - // SelectionDAGISel related APIs. - std::unique_ptr TSInfo; - // GlobalISel related APIs. std::unique_ptr CallLoweringInfo; std::unique_ptr InstSelector; @@ -177,7 +178,6 @@ class M68kSubtarget : public M68kGenSubtargetInfo { std::unique_ptr RegBankInfo; public: - const SelectionDAGTargetInfo *getSelectionDAGInfo() const override; const CallLowering *getCallLowering() const override; InstructionSelector *getInstructionSelector() const override; const LegalizerInfo *getLegalizerInfo() const override; diff --git a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h index caef8fe790adb..430fcd741c1b6 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h +++ b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h @@ -90,14 +90,14 @@ class NVPTXExternalAAWrapper : public ExternalAAWrapperPass { public: static char ID; + bool runEarly() override { return true; } + NVPTXExternalAAWrapper() - : ExternalAAWrapperPass( - [](Pass &P, Function &, AAResults &AAR) { - if (auto *WrapperPass = - P.getAnalysisIfAvailable()) - AAR.addAAResult(WrapperPass->getResult()); - }, - /*RunEarly=*/true) {} + : ExternalAAWrapperPass([](Pass &P, Function &, AAResults &AAR) { + if (auto *WrapperPass = + P.getAnalysisIfAvailable()) + AAR.addAAResult(WrapperPass->getResult()); + }) {} StringRef getPassName() const override { return "NVPTX Address space based Alias Analysis Wrapper"; diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 2247ae3cf8f46..7d171cff7bcb4 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -2685,6 +2685,31 @@ void NVPTXDAGToDAGISel::SelectCpAsyncBulkTensorReduceCommon(SDNode *N, ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops)); } +void NVPTXDAGToDAGISel::SelectCpAsyncBulkS2G(SDNode *N) { + // We have {Chain, Intrinsic-ID} followed by the actual intrisic args: + // dst, src, size, cache_hint, cache_hint_flag + // NumOperands = {Chain, IID} + {Actual intrinsic args} + // = {2} + {5} + size_t NumOps = N->getNumOperands(); + bool IsCacheHint = N->getConstantOperandVal(NumOps - 1) == 1; + size_t NumArgs = IsCacheHint ? 4 : 3; // src, dst, size, cache_hint + + SDLoc DL(N); + SmallVector Ops(N->ops().slice(2, NumArgs)); + Ops.push_back(N->getOperand(0)); // Chain operand + + bool IsShared32 = + CurDAG->getDataLayout().getPointerSizeInBits(ADDRESS_SPACE_SHARED) == 32; + unsigned Opcode; + if (IsCacheHint) + Opcode = IsShared32 ? NVPTX::CP_ASYNC_BULK_S2G_SHARED32_CH + : NVPTX::CP_ASYNC_BULK_S2G_CH; + else + Opcode = IsShared32 ? NVPTX::CP_ASYNC_BULK_S2G_SHARED32 + : NVPTX::CP_ASYNC_BULK_S2G; + ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops)); +} + void NVPTXDAGToDAGISel::SelectCpAsyncBulkG2S(SDNode *N) { // We have {Chain, Intrinsic-ID} followed by the actual intrisic args: // {dst, mbar, src, size, multicast, cache_hint, @@ -2867,6 +2892,9 @@ bool NVPTXDAGToDAGISel::tryIntrinsicVoid(SDNode *N) { case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster: SelectCpAsyncBulkG2S(N); return true; + case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_global: + SelectCpAsyncBulkS2G(N); + return true; case Intrinsic::nvvm_cp_async_bulk_prefetch_L2: SelectCpAsyncBulkPrefetchL2(N); return true; diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index 92efabc7e2068..23cbd458571a0 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -93,6 +93,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel { void SelectV2I64toI128(SDNode *N); void SelectI128toV2I64(SDNode *N); void SelectCpAsyncBulkG2S(SDNode *N); + void SelectCpAsyncBulkS2G(SDNode *N); void SelectCpAsyncBulkPrefetchL2(SDNode *N); void SelectCpAsyncBulkTensorG2SCommon(SDNode *N, bool IsIm2Col = false); void SelectCpAsyncBulkTensorS2GCommon(SDNode *N, bool IsIm2Col = false); diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 4f8a798295b42..d3cfce76c666e 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -511,11 +511,10 @@ def CP_ASYNC_BULK_WAIT_GROUP_READ : // TMA Async Bulk Copy Functions //------------------------------ -class CpAsyncBulkStr { +class CpAsyncBulkStr { // Shared to Global memory string S2G = "cp.async.bulk.global.shared::cta.bulk_group" - # !if(ch, ".L2::cache_hint", "") - # !if(mask, ".cp_mask", ""); + # !if(ch, ".L2::cache_hint", ""); // Global to Shared cluster memory string G2S = "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes" @@ -526,23 +525,18 @@ class CpAsyncBulkStr { string C2C = "cp.async.bulk.shared::cluster.shared::cta.mbarrier::complete_tx::bytes"; } -multiclass CP_ASYNC_BULK_S2G_INTR { - def NAME : NVPTXInst<(outs), (ins ADDR:$dst, ADDR:$src, Int32Regs:$size, Int64Regs:$ch), - !if(has_ch, - CpAsyncBulkStr<0, 1>.S2G # " [$dst], [$src], $size, $ch;", - CpAsyncBulkStr<0, 0>.S2G # " [$dst], [$src], $size;"), - [(int_nvvm_cp_async_bulk_shared_cta_to_global addr:$dst, addr:$src, i32:$size, i64:$ch, !if(has_ch, -1, 0))]>, - Requires<[hasPTX<80>, hasSM<90>]>; - - def NAME # _BM : NVPTXInst<(outs), (ins ADDR:$dst, ADDR:$src, Int32Regs:$size, Int64Regs:$ch, Int16Regs:$mask), - !if(has_ch, - CpAsyncBulkStr<0, 1, 1>.S2G # " [$dst], [$src], $size, $ch, $mask;", - CpAsyncBulkStr<0, 0, 1>.S2G # " [$dst], [$src], $size, $mask;"), - [(int_nvvm_cp_async_bulk_shared_cta_to_global_bytemask addr:$dst, addr:$src, i32:$size, i64:$ch, !if(has_ch, -1, 0), i16:$mask)]>, - Requires<[hasPTX<86>, hasSM<100>]>; +multiclass CP_ASYNC_BULK_S2G { + def NAME: NVPTXInst<(outs), + (ins Int64Regs:$dst, rc:$src, Int32Regs:$size), + !strconcat(CpAsyncBulkStr<0, 0>.S2G, " [$dst], [$src], $size;"), []>, + Requires<[hasPTX<80>, hasSM<90>]>; + def NAME # _CH: NVPTXInst<(outs), + (ins Int64Regs:$dst, rc:$src, Int32Regs:$size, Int64Regs:$ch), + !strconcat(CpAsyncBulkStr<0, 1>.S2G, " [$dst], [$src], $size, $ch;"), []>, + Requires<[hasPTX<80>, hasSM<90>]>; } -defm CP_ASYNC_BULK_S2G : CP_ASYNC_BULK_S2G_INTR<0>; -defm CP_ASYNC_BULK_S2G_CH : CP_ASYNC_BULK_S2G_INTR<1>; +defm CP_ASYNC_BULK_S2G : CP_ASYNC_BULK_S2G; +defm CP_ASYNC_BULK_S2G_SHARED32 : CP_ASYNC_BULK_S2G; multiclass CP_ASYNC_BULK_G2S { def NAME: NVPTXInst<(outs), diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index a1c9091c95b48..9bc4734815364 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -1272,11 +1272,6 @@ static MCRegister convertFPR64ToFPR32(MCRegister Reg) { return Reg - RISCV::F0_D + RISCV::F0_F; } -static MCRegister convertFPR64ToFPR128(MCRegister Reg) { - assert(Reg >= RISCV::F0_D && Reg <= RISCV::F31_D && "Invalid register"); - return Reg - RISCV::F0_D + RISCV::F0_Q; -} - static MCRegister convertVRToVRMx(const MCRegisterInfo &RI, MCRegister Reg, unsigned Kind) { unsigned RegClassID; @@ -1305,10 +1300,6 @@ unsigned RISCVAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, RISCVMCRegisterClasses[RISCV::FPR64CRegClassID].contains(Reg); bool IsRegVR = RISCVMCRegisterClasses[RISCV::VRRegClassID].contains(Reg); - if (IsRegFPR64 && Kind == MCK_FPR128) { - Op.Reg.RegNum = convertFPR64ToFPR128(Reg); - return Match_Success; - } // As the parser couldn't differentiate an FPR32 from an FPR64, coerce the // register from FPR64 to FPR32 or FPR64C to FPR32C if necessary. if ((IsRegFPR64 && Kind == MCK_FPR32) || @@ -1672,16 +1663,13 @@ bool RISCVAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, // rejected. MCRegister RISCVAsmParser::matchRegisterNameHelper(StringRef Name) const { MCRegister Reg = MatchRegisterName(Name); - // The 16-/32-/128- and 64-bit FPRs have the same asm name. Check - // that the initial match always matches the 64-bit variant, and - // not the 16/32/128-bit one. + // The 16-/32- and 64-bit FPRs have the same asm name. Check that the initial + // match always matches the 64-bit variant, and not the 16/32-bit one. assert(!(Reg >= RISCV::F0_H && Reg <= RISCV::F31_H)); assert(!(Reg >= RISCV::F0_F && Reg <= RISCV::F31_F)); - assert(!(Reg >= RISCV::F0_Q && Reg <= RISCV::F31_Q)); // The default FPR register class is based on the tablegen enum ordering. static_assert(RISCV::F0_D < RISCV::F0_H, "FPR matching must be updated"); static_assert(RISCV::F0_D < RISCV::F0_F, "FPR matching must be updated"); - static_assert(RISCV::F0_D < RISCV::F0_Q, "FPR matching must be updated"); if (!Reg) Reg = MatchRegisterAltName(Name); if (isRVE() && Reg >= RISCV::X16 && Reg <= RISCV::X31) @@ -3860,9 +3848,6 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, case RISCV::PseudoFLD: emitLoadStoreSymbol(Inst, RISCV::FLD, IDLoc, Out, /*HasTmpReg=*/true); return false; - case RISCV::PseudoFLQ: - emitLoadStoreSymbol(Inst, RISCV::FLQ, IDLoc, Out, /*HasTmpReg=*/true); - return false; case RISCV::PseudoSB: case RISCV::PseudoQC_E_SB: emitLoadStoreSymbol(Inst, RISCV::SB, IDLoc, Out, /*HasTmpReg=*/true); @@ -3890,9 +3875,6 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, case RISCV::PseudoFSD: emitLoadStoreSymbol(Inst, RISCV::FSD, IDLoc, Out, /*HasTmpReg=*/true); return false; - case RISCV::PseudoFSQ: - emitLoadStoreSymbol(Inst, RISCV::FSQ, IDLoc, Out, /*HasTmpReg=*/true); - return false; case RISCV::PseudoAddTPRel: if (checkPseudoAddTPRel(Inst, Operands)) return true; diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index a5e76668db6b9..ee8aa376f467d 100644 --- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -177,17 +177,6 @@ static DecodeStatus DecodeFPR64CRegisterClass(MCInst &Inst, uint32_t RegNo, return MCDisassembler::Success; } -static DecodeStatus DecodeFPR128RegisterClass(MCInst &Inst, uint32_t RegNo, - uint64_t Address, - const MCDisassembler *Decoder) { - if (RegNo >= 32) - return MCDisassembler::Fail; - - MCRegister Reg = RISCV::F0_Q + RegNo; - Inst.addOperand(MCOperand::createReg(Reg)); - return MCDisassembler::Success; -} - static DecodeStatus DecodeGPRNoX0RegisterClass(MCInst &Inst, uint32_t RegNo, uint64_t Address, const MCDisassembler *Decoder) { @@ -739,8 +728,7 @@ static constexpr FeatureBitset XTHeadGroup = { RISCV::FeatureVendorXTHeadVdot}; static constexpr FeatureBitset XAndesGroup = { - RISCV::FeatureVendorXAndesPerf, RISCV::FeatureVendorXAndesVPackFPH, - RISCV::FeatureVendorXAndesVDot}; + RISCV::FeatureVendorXAndesPerf, RISCV::FeatureVendorXAndesVPackFPH}; static constexpr DecoderListEntry DecoderList32[]{ // Vendor Extensions diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index b36d496137400..daae4e88a38e2 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -187,7 +187,6 @@ def FeatureStdExtZilsd def HasStdExtZilsd : Predicate<"Subtarget->hasStdExtZilsd()">, AssemblerPredicate<(all_of FeatureStdExtZilsd), "'Zilsd' (Load/Store pair instructions)">; -def NoHasStdExtZilsd : Predicate<"!Subtarget->hasStdExtZilsd()">; // Multiply Extensions @@ -292,13 +291,6 @@ def HasStdExtD : Predicate<"Subtarget->hasStdExtD()">, AssemblerPredicate<(all_of FeatureStdExtD), "'D' (Double-Precision Floating-Point)">; -def FeatureStdExtQ - : RISCVExtension<2, 2, "Quad-Precision Floating-Point", [FeatureStdExtD]>, - RISCVExtensionBitmask<0, 16>; -def HasStdExtQ : Predicate<"Subtarget->hasStdExtQ()">, - AssemblerPredicate<(all_of FeatureStdExtQ), - "'Q' (Quad-Precision Floating-Point)">; - def FeatureStdExtZfhmin : RISCVExtension<1, 0, "Half-Precision Floating-Point Minimal", [FeatureStdExtF]>, @@ -1533,14 +1525,6 @@ def HasVendorXAndesVPackFPH AssemblerPredicate<(all_of FeatureVendorXAndesVPackFPH), "'XAndesVPackFPH' (Andes Vector Packed FP16 Extension)">; -def FeatureVendorXAndesVDot - : RISCVExtension<5, 0, "Andes Vector Dot Product Extension", - [FeatureStdExtZve32x]>; -def HasVendorXAndesVDot - : Predicate<"Subtarget->hasVendorXAndesVDot()">, - AssemblerPredicate<(all_of FeatureVendorXAndesVDot), - "'XAndesVDot' (Andes Vector Dot Product Extension)">; - //===----------------------------------------------------------------------===// // LLVM specific features and extensions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 18af1545d5a34..9db15ff25f979 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -1626,51 +1626,6 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { } break; } - case RISCVISD::LD_RV32: { - assert(Subtarget->hasStdExtZilsd() && "LD_RV32 is only used with Zilsd"); - - SDValue Base, Offset; - SDValue Chain = Node->getOperand(0); - SDValue Addr = Node->getOperand(1); - SelectAddrRegImm(Addr, Base, Offset); - - SDValue Ops[] = {Base, Offset, Chain}; - MachineSDNode *New = CurDAG->getMachineNode( - RISCV::LD_RV32, DL, {MVT::Untyped, MVT::Other}, Ops); - SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL, - MVT::i32, SDValue(New, 0)); - SDValue Hi = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL, - MVT::i32, SDValue(New, 0)); - CurDAG->setNodeMemRefs(New, {cast(Node)->getMemOperand()}); - ReplaceUses(SDValue(Node, 0), Lo); - ReplaceUses(SDValue(Node, 1), Hi); - ReplaceUses(SDValue(Node, 2), SDValue(New, 1)); - CurDAG->RemoveDeadNode(Node); - return; - } - case RISCVISD::SD_RV32: { - SDValue Base, Offset; - SDValue Chain = Node->getOperand(0); - SDValue Addr = Node->getOperand(3); - SelectAddrRegImm(Addr, Base, Offset); - - SDValue Ops[] = { - CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32), - Node->getOperand(1), - CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32), - Node->getOperand(2), - CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)}; - - SDNode *RegPair = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, - MVT::Untyped, Ops); - MachineSDNode *New = - CurDAG->getMachineNode(RISCV::SD_RV32, DL, MVT::Other, - {SDValue(RegPair, 0), Base, Offset, Chain}); - CurDAG->setNodeMemRefs(New, {cast(Node)->getMemOperand()}); - ReplaceUses(SDValue(Node, 0), SDValue(New, 0)); - CurDAG->RemoveDeadNode(Node); - return; - } case ISD::INTRINSIC_WO_CHAIN: { unsigned IntNo = Node->getConstantOperandVal(0); switch (IntNo) { @@ -2576,7 +2531,8 @@ bool RISCVDAGToDAGISel::SelectAddrFrameIndex(SDValue Addr, SDValue &Base, static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, const RISCVSubtarget *Subtarget, SDValue Addr, SDValue &Base, SDValue &Offset, - bool IsPrefetch = false) { + bool IsPrefetch = false, + bool IsRV32Zdinx = false) { if (!isa(Addr)) return false; @@ -2590,6 +2546,9 @@ static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, if (!Subtarget->is64Bit() || isInt<32>(Hi)) { if (IsPrefetch && (Lo12 & 0b11111) != 0) return false; + if (IsRV32Zdinx && !isInt<12>(Lo12 + 4)) + return false; + if (Hi) { int64_t Hi20 = (Hi >> 12) & 0xfffff; Base = SDValue( @@ -2613,6 +2572,8 @@ static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, Lo12 = Seq.back().getImm(); if (IsPrefetch && (Lo12 & 0b11111) != 0) return false; + if (IsRV32Zdinx && !isInt<12>(Lo12 + 4)) + return false; // Drop the last instruction. Seq.pop_back(); @@ -2704,7 +2665,7 @@ bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr, } bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, - SDValue &Offset) { + SDValue &Offset, bool IsRV32Zdinx) { if (SelectAddrFrameIndex(Addr, Base, Offset)) return true; @@ -2712,14 +2673,39 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, MVT VT = Addr.getSimpleValueType(); if (Addr.getOpcode() == RISCVISD::ADD_LO) { - Base = Addr.getOperand(0); - Offset = Addr.getOperand(1); - return true; + // If this is non RV32Zdinx we can always fold. + if (!IsRV32Zdinx) { + Base = Addr.getOperand(0); + Offset = Addr.getOperand(1); + return true; + } + + // For RV32Zdinx we need to have more than 4 byte alignment so we can add 4 + // to the offset when we expand in RISCVExpandPseudoInsts. + if (auto *GA = dyn_cast(Addr.getOperand(1))) { + const DataLayout &DL = CurDAG->getDataLayout(); + Align Alignment = commonAlignment( + GA->getGlobal()->getPointerAlignment(DL), GA->getOffset()); + if (Alignment > 4) { + Base = Addr.getOperand(0); + Offset = Addr.getOperand(1); + return true; + } + } + if (auto *CP = dyn_cast(Addr.getOperand(1))) { + Align Alignment = commonAlignment(CP->getAlign(), CP->getOffset()); + if (Alignment > 4) { + Base = Addr.getOperand(0); + Offset = Addr.getOperand(1); + return true; + } + } } + int64_t RV32ZdinxRange = IsRV32Zdinx ? 4 : 0; if (CurDAG->isBaseWithConstantOffset(Addr)) { int64_t CVal = cast(Addr.getOperand(1))->getSExtValue(); - if (isInt<12>(CVal) && isInt<12>(CVal)) { + if (isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) { Base = Addr.getOperand(0); if (Base.getOpcode() == RISCVISD::ADD_LO) { SDValue LoOperand = Base.getOperand(1); @@ -2732,7 +2718,8 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, const DataLayout &DL = CurDAG->getDataLayout(); Align Alignment = commonAlignment( GA->getGlobal()->getPointerAlignment(DL), GA->getOffset()); - if ((CVal == 0 || Alignment > CVal)) { + if ((CVal == 0 || Alignment > CVal) && + (!IsRV32Zdinx || commonAlignment(Alignment, CVal) > 4)) { int64_t CombinedOffset = CVal + GA->getOffset(); Base = Base.getOperand(0); Offset = CurDAG->getTargetGlobalAddress( @@ -2753,13 +2740,13 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, // Handle ADD with large immediates. if (Addr.getOpcode() == ISD::ADD && isa(Addr.getOperand(1))) { int64_t CVal = cast(Addr.getOperand(1))->getSExtValue(); - assert(!(isInt<12>(CVal) && isInt<12>(CVal)) && + assert(!(isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) && "simm12 not already handled?"); // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use // an ADDI for part of the offset and fold the rest into the load/store. // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td. - if (CVal >= -4096 && CVal <= 4094) { + if (CVal >= -4096 && CVal <= (4094 - RV32ZdinxRange)) { int64_t Adj = CVal < 0 ? -2048 : 2047; Base = SDValue( CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0), @@ -2777,7 +2764,7 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, // instructions. if (isWorthFoldingAdd(Addr) && selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base, - Offset, /*IsPrefetch=*/false)) { + Offset, /*IsPrefetch=*/false, RV32ZdinxRange)) { // Insert an ADD instruction with the materialized Hi52 bits. Base = SDValue( CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base), @@ -2787,7 +2774,7 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, } if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset, - /*IsPrefetch=*/false)) + /*IsPrefetch=*/false, RV32ZdinxRange)) return true; Base = Addr; diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h index 11d62e5edad3f..cd211d41f30fb 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -46,7 +46,11 @@ class RISCVDAGToDAGISel : public SelectionDAGISel { std::vector &OutOps) override; bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset); - bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset); + bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset, + bool IsRV32Zdinx = false); + bool SelectAddrRegImmRV32Zdinx(SDValue Addr, SDValue &Base, SDValue &Offset) { + return SelectAddrRegImm(Addr, Base, Offset, true); + } bool SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, SDValue &Offset); bool SelectAddrRegRegScale(SDValue Addr, unsigned MaxShiftAmount, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index c56877b9fcfe4..c01496c9a7f3a 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -318,11 +318,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, !(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand); - if (Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit()) { - setOperationAction(ISD::LOAD, MVT::i64, Custom); - setOperationAction(ISD::STORE, MVT::i64, Custom); - } - if (Subtarget.is64Bit()) { setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); @@ -586,12 +581,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, if (!Subtarget.is64Bit()) setOperationAction(ISD::BITCAST, MVT::i64, Custom); - if (Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() && - !Subtarget.is64Bit()) { - setOperationAction(ISD::LOAD, MVT::f64, Custom); - setOperationAction(ISD::STORE, MVT::f64, Custom); - } - if (Subtarget.hasStdExtZfa()) { setOperationAction(ISD::ConstantFP, MVT::f64, Custom); setOperationAction(FPRndMode, MVT::f64, Legal); @@ -5332,13 +5321,15 @@ static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, Mask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, [&]() { Operands.emplace_back(); }, [&](ArrayRef SrcSubMask, unsigned SrcVecIdx, unsigned DstVecIdx) { - Operands.emplace_back().emplace_back(SrcVecIdx, UINT_MAX, - SmallVector(SrcSubMask)); + Operands.emplace_back().emplace_back( + SrcVecIdx, UINT_MAX, + SmallVector(SrcSubMask.begin(), SrcSubMask.end())); }, [&](ArrayRef SrcSubMask, unsigned Idx1, unsigned Idx2, bool NewReg) { if (NewReg) Operands.emplace_back(); - Operands.back().emplace_back(Idx1, Idx2, SmallVector(SrcSubMask)); + Operands.back().emplace_back( + Idx1, Idx2, SmallVector(SrcSubMask.begin(), SrcSubMask.end())); }); assert(Operands.size() == NumOfDestRegs && "Whole vector must be processed"); // Note: check that we do not emit too many shuffles here to prevent code @@ -7714,42 +7705,19 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, } case ISD::LOAD: { auto *Load = cast(Op); - EVT VT = Load->getValueType(0); - if (VT == MVT::f64) { - assert(Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() && - !Subtarget.is64Bit() && "Unexpected custom legalisation"); - - // Replace a double precision load with two i32 loads and a BuildPairF64. - SDLoc DL(Op); - SDValue BasePtr = Load->getBasePtr(); - SDValue Chain = Load->getChain(); - - SDValue Lo = DAG.getLoad(MVT::i32, DL, Chain, BasePtr, - Load->getPointerInfo(), Load->getOriginalAlign(), - Load->getMemOperand()->getFlags()); - BasePtr = DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(4)); - SDValue Hi = DAG.getLoad( - MVT::i32, DL, Chain, BasePtr, Load->getPointerInfo().getWithOffset(4), - Load->getOriginalAlign(), Load->getMemOperand()->getFlags()); - Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), - Hi.getValue(1)); - - SDValue Pair = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); - return DAG.getMergeValues({Pair, Chain}, DL); - } - + EVT VecTy = Load->getMemoryVT(); // Handle normal vector tuple load. - if (VT.isRISCVVectorTuple()) { + if (VecTy.isRISCVVectorTuple()) { SDLoc DL(Op); MVT XLenVT = Subtarget.getXLenVT(); - unsigned NF = VT.getRISCVVectorTupleNumFields(); - unsigned Sz = VT.getSizeInBits().getKnownMinValue(); + unsigned NF = VecTy.getRISCVVectorTupleNumFields(); + unsigned Sz = VecTy.getSizeInBits().getKnownMinValue(); unsigned NumElts = Sz / (NF * 8); int Log2LMUL = Log2_64(NumElts) - 3; auto Flag = SDNodeFlags(); Flag.setNoUnsignedWrap(true); - SDValue Ret = DAG.getUNDEF(VT); + SDValue Ret = DAG.getUNDEF(VecTy); SDValue BasePtr = Load->getBasePtr(); SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT); VROffset = @@ -7763,7 +7731,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, MVT::getScalableVectorVT(MVT::i8, NumElts), DL, Load->getChain(), BasePtr, MachinePointerInfo(Load->getAddressSpace()), Align(8)); OutChains.push_back(LoadVal.getValue(1)); - Ret = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Ret, LoadVal, + Ret = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VecTy, Ret, LoadVal, DAG.getVectorIdxConstant(i, DL)); BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag); } @@ -7780,54 +7748,13 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::STORE: { auto *Store = cast(Op); SDValue StoredVal = Store->getValue(); - EVT VT = StoredVal.getValueType(); - if (VT == MVT::f64) { - assert(Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() && - !Subtarget.is64Bit() && "Unexpected custom legalisation"); - - // Replace a double precision store with a SplitF64 and i32 stores. - SDValue DL(Op); - SDValue BasePtr = Store->getBasePtr(); - SDValue Chain = Store->getChain(); - SDValue Split = DAG.getNode(RISCVISD::SplitF64, DL, - DAG.getVTList(MVT::i32, MVT::i32), StoredVal); - - SDValue Lo = DAG.getStore( - Chain, DL, Split.getValue(0), BasePtr, Store->getPointerInfo(), - Store->getOriginalAlign(), Store->getMemOperand()->getFlags()); - BasePtr = DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(4)); - SDValue Hi = DAG.getStore(Chain, DL, Split.getValue(1), BasePtr, - Store->getPointerInfo().getWithOffset(4), - Store->getOriginalAlign(), - Store->getMemOperand()->getFlags()); - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); - } - if (VT == MVT::i64) { - assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() && - "Unexpected custom legalisation"); - if (Store->isTruncatingStore()) - return SDValue(); - - if (!Subtarget.enableUnalignedScalarMem() && Store->getAlign() < 8) - return SDValue(); - - SDLoc DL(Op); - SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, StoredVal, - DAG.getTargetConstant(0, DL, MVT::i32)); - SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, StoredVal, - DAG.getTargetConstant(1, DL, MVT::i32)); - - return DAG.getMemIntrinsicNode( - RISCVISD::SD_RV32, DL, DAG.getVTList(MVT::Other), - {Store->getChain(), Lo, Hi, Store->getBasePtr()}, MVT::i64, - Store->getMemOperand()); - } + EVT VecTy = StoredVal.getValueType(); // Handle normal vector tuple store. - if (VT.isRISCVVectorTuple()) { + if (VecTy.isRISCVVectorTuple()) { SDLoc DL(Op); MVT XLenVT = Subtarget.getXLenVT(); - unsigned NF = VT.getRISCVVectorTupleNumFields(); - unsigned Sz = VT.getSizeInBits().getKnownMinValue(); + unsigned NF = VecTy.getRISCVVectorTupleNumFields(); + unsigned Sz = VecTy.getSizeInBits().getKnownMinValue(); unsigned NumElts = Sz / (NF * 8); int Log2LMUL = Log2_64(NumElts) - 3; @@ -13787,28 +13714,6 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, // sext_inreg we emit for ADD/SUB/MUL/SLLI. LoadSDNode *Ld = cast(N); - if (N->getValueType(0) == MVT::i64) { - assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() && - "Unexpected custom legalisation"); - - if (!Subtarget.enableUnalignedScalarMem() && Ld->getAlign() < 8) - return; - - SDLoc DL(N); - SDValue Result = DAG.getMemIntrinsicNode( - RISCVISD::LD_RV32, DL, - DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}), - {Ld->getChain(), Ld->getBasePtr()}, MVT::i64, Ld->getMemOperand()); - SDValue Lo = Result.getValue(0); - SDValue Hi = Result.getValue(1); - SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi); - Results.append({Pair, Result.getValue(2)}); - return; - } - - assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && - "Unexpected custom legalisation"); - SDLoc dl(N); SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(), Ld->getBasePtr(), Ld->getMemoryVT(), @@ -23479,8 +23384,8 @@ bool RISCVTargetLowering::preferScalarizeSplat(SDNode *N) const { static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) { Module *M = IRB.GetInsertBlock()->getModule(); - Function *ThreadPointerFunc = Intrinsic::getOrInsertDeclaration( - M, Intrinsic::thread_pointer, IRB.getPtrTy()); + Function *ThreadPointerFunc = + Intrinsic::getOrInsertDeclaration(M, Intrinsic::thread_pointer); return IRB.CreateConstGEP1_32(IRB.getInt8Ty(), IRB.CreateCall(ThreadPointerFunc), Offset); } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 87e6248a38693..f181c1e137545 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -651,21 +651,29 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineFrameInfo &MFI = MF->getFrameInfo(); unsigned Opcode; + bool IsScalableVector = true; if (RISCV::GPRRegClass.hasSubClassEq(RC)) { Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ? RISCV::SW : RISCV::SD; + IsScalableVector = false; } else if (RISCV::GPRF16RegClass.hasSubClassEq(RC)) { Opcode = RISCV::SH_INX; + IsScalableVector = false; } else if (RISCV::GPRF32RegClass.hasSubClassEq(RC)) { Opcode = RISCV::SW_INX; + IsScalableVector = false; } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) { Opcode = RISCV::PseudoRV32ZdinxSD; + IsScalableVector = false; } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) { Opcode = RISCV::FSH; + IsScalableVector = false; } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) { Opcode = RISCV::FSW; + IsScalableVector = false; } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) { Opcode = RISCV::FSD; + IsScalableVector = false; } else if (RISCV::VRRegClass.hasSubClassEq(RC)) { Opcode = RISCV::VS1R_V; } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) { @@ -699,7 +707,7 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, else llvm_unreachable("Can't store this register to stack slot"); - if (RISCVRegisterInfo::isRVVRegClass(RC)) { + if (IsScalableVector) { MachineMemOperand *MMO = MF->getMachineMemOperand( MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, TypeSize::getScalable(MFI.getObjectSize(FI)), MFI.getObjectAlign(FI)); @@ -735,21 +743,29 @@ void RISCVInstrInfo::loadRegFromStackSlot( Flags & MachineInstr::FrameDestroy ? MBB.findDebugLoc(I) : DebugLoc(); unsigned Opcode; + bool IsScalableVector = true; if (RISCV::GPRRegClass.hasSubClassEq(RC)) { Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ? RISCV::LW : RISCV::LD; + IsScalableVector = false; } else if (RISCV::GPRF16RegClass.hasSubClassEq(RC)) { Opcode = RISCV::LH_INX; + IsScalableVector = false; } else if (RISCV::GPRF32RegClass.hasSubClassEq(RC)) { Opcode = RISCV::LW_INX; + IsScalableVector = false; } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) { Opcode = RISCV::PseudoRV32ZdinxLD; + IsScalableVector = false; } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) { Opcode = RISCV::FLH; + IsScalableVector = false; } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) { Opcode = RISCV::FLW; + IsScalableVector = false; } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) { Opcode = RISCV::FLD; + IsScalableVector = false; } else if (RISCV::VRRegClass.hasSubClassEq(RC)) { Opcode = RISCV::VL1RE8_V; } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) { @@ -783,7 +799,7 @@ void RISCVInstrInfo::loadRegFromStackSlot( else llvm_unreachable("Can't load this register from stack slot"); - if (RISCVRegisterInfo::isRVVRegClass(RC)) { + if (IsScalableVector) { MachineMemOperand *MMO = MF->getMachineMemOperand( MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, TypeSize::getScalable(MFI.getObjectSize(FI)), MFI.getObjectAlign(FI)); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index aaeb4fd363f57..e9bdeb88e4ca8 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -2210,17 +2210,9 @@ include "RISCVInstrInfoA.td" include "RISCVInstrInfoZa.td" include "RISCVInstrInfoZalasr.td" -// Integer -include "RISCVInstrInfoZimop.td" -include "RISCVInstrInfoZicbo.td" -include "RISCVInstrInfoZicond.td" -include "RISCVInstrInfoZicfiss.td" -include "RISCVInstrInfoZilsd.td" - // Scalar FP include "RISCVInstrInfoF.td" include "RISCVInstrInfoD.td" -include "RISCVInstrInfoQ.td" include "RISCVInstrInfoZfh.td" include "RISCVInstrInfoZfbfmin.td" include "RISCVInstrInfoZfa.td" @@ -2234,6 +2226,13 @@ include "RISCVInstrInfoV.td" include "RISCVInstrInfoZvk.td" include "RISCVInstrInfoZvqdotq.td" +// Integer +include "RISCVInstrInfoZimop.td" +include "RISCVInstrInfoZicbo.td" +include "RISCVInstrInfoZicond.td" +include "RISCVInstrInfoZicfiss.td" +include "RISCVInstrInfoZilsd.td" + // Compressed include "RISCVInstrInfoC.td" include "RISCVInstrInfoZc.td" diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td index 414e093510607..0c584daf45b14 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td @@ -27,6 +27,8 @@ def : GINodeEquiv; def RISCVSplitF64 : RVSDNode<"SplitF64", SDT_RISCVSplitF64>; def : GINodeEquiv; +def AddrRegImmINX : ComplexPattern; + //===----------------------------------------------------------------------===// // Operand and SDNode transformation definitions. //===----------------------------------------------------------------------===// @@ -527,19 +529,18 @@ defm Select_FPR64IN32X : SelectCC_GPR_rrirr; def PseudoFROUND_D_IN32X : PseudoFROUND; /// Loads -let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 1 in +let isCall = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 1 in def PseudoRV32ZdinxLD : Pseudo<(outs GPRPair:$dst), (ins GPR:$rs1, simm12:$imm12), []>; +def : Pat<(f64 (load (AddrRegImmINX (XLenVT GPR:$rs1), simm12:$imm12))), + (PseudoRV32ZdinxLD GPR:$rs1, simm12:$imm12)>; /// Stores -let hasSideEffects = 0, mayLoad = 0, mayStore = 1, Size = 8, isCodeGenOnly = 1 in +let isCall = 0, mayLoad = 0, mayStore = 1, Size = 8, isCodeGenOnly = 1 in def PseudoRV32ZdinxSD : Pseudo<(outs), (ins GPRPair:$rs2, GPRNoX0:$rs1, simm12:$imm12), []>; +def : Pat<(store (f64 GPRPair:$rs2), (AddrRegImmINX (XLenVT GPR:$rs1), simm12:$imm12)), + (PseudoRV32ZdinxSD GPRPair:$rs2, GPR:$rs1, simm12:$imm12)>; } // Predicates = [HasStdExtZdinx, IsRV32] -let Predicates = [HasStdExtZdinx, HasStdExtZilsd, IsRV32] in { -def : LdPat; -def : StPat; -} - let Predicates = [HasStdExtD, IsRV32] in { // double->[u]int. Round-to-zero must be used. diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td index 84a75666e5f36..360191f03ddf7 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td @@ -121,7 +121,7 @@ def FPR32INX : RegisterOperand { let ParserMatchClass = GPRAsFPR32; } -// Describes a combination of predicates from F/D/Q/Zfh/Zfhmin or +// Describes a combination of predicates from F/D/Zfh/Zfhmin or // Zfinx/Zdinx/Zhinx/Zhinxmin that are applied to scalar FP instruction. // Contains the DAGOperand for the primary type for the predicates. The primary // type may be unset for combinations of predicates like Zfh+D. diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td b/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td deleted file mode 100644 index da78c13c0edcc..0000000000000 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoQ.td +++ /dev/null @@ -1,167 +0,0 @@ -//===-- RISCVInstrInfoQ.td - RISC-V 'Q' instructions -------*- tablegen -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file describes the RISC-V instructions from the standard 'Q', -// Quad-Precision Floating-Point instruction set extension. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Operand and SDNode transformation definitions. -//===----------------------------------------------------------------------===// - -def QExt : ExtInfo<"", "", [HasStdExtQ], f128, FPR128, FPR32, FPR64, ?>; - -defvar QExts = [QExt]; -defvar QExtsRV64 = [QExt]; - -//===----------------------------------------------------------------------===// -// Instructions -//===----------------------------------------------------------------------===// - -let Predicates = [HasStdExtQ] in { - def FLQ : FPLoad_r<0b100, "flq", FPR128, WriteFLD128>; - - // Operands for stores are in the order srcreg, base, offset rather than - // reflecting the order these fields are specified in the instruction - // encoding. - def FSQ : FPStore_r<0b100, "fsq", FPR128, WriteFST128>; -} // Predicates = [HasStdExtQ] - -foreach Ext = QExts in { - let SchedRW = [WriteFMA128, ReadFMA128, ReadFMA128, ReadFMA128Addend] in { - defm FMADD_Q : FPFMA_rrr_frm_m; - defm FMSUB_Q : FPFMA_rrr_frm_m; - defm FNMSUB_Q : FPFMA_rrr_frm_m; - defm FNMADD_Q : FPFMA_rrr_frm_m; - } - - let SchedRW = [WriteFAdd128, ReadFAdd128, ReadFAdd128] in { - defm FADD_Q : FPALU_rr_frm_m<0b0000011, "fadd.q", Ext>; - defm FSUB_Q : FPALU_rr_frm_m<0b0000111, "fsub.q", Ext>; - } - - let SchedRW = [WriteFMul128, ReadFMul128, ReadFMul128] in - defm FMUL_Q : FPALU_rr_frm_m<0b0001011, "fmul.q", Ext>; - - let SchedRW = [WriteFDiv128, ReadFDiv128, ReadFDiv128] in - defm FDIV_Q : FPALU_rr_frm_m<0b0001111, "fdiv.q", Ext>; - - defm FSQRT_Q : FPUnaryOp_r_frm_m<0b0101111, 0b00000, Ext, Ext.PrimaryTy, - Ext.PrimaryTy, "fsqrt.q">, - Sched<[WriteFSqrt128, ReadFSqrt128]>; - - let SchedRW = [WriteFSGNJ128, ReadFSGNJ128, ReadFSGNJ128], - mayRaiseFPException = 0 in { - defm FSGNJ_Q : FPALU_rr_m<0b0010011, 0b000, "fsgnj.q", Ext>; - defm FSGNJN_Q : FPALU_rr_m<0b0010011, 0b001, "fsgnjn.q", Ext>; - defm FSGNJX_Q : FPALU_rr_m<0b0010011, 0b010, "fsgnjx.q", Ext>; - } - - let SchedRW = [WriteFMinMax128, ReadFMinMax128, ReadFMinMax128] in { - defm FMIN_Q : FPALU_rr_m<0b0010111, 0b000, "fmin.q", Ext, Commutable = 1>; - defm FMAX_Q : FPALU_rr_m<0b0010111, 0b001, "fmax.q", Ext, Commutable = 1>; - } - - defm FCVT_S_Q : FPUnaryOp_r_frm_m<0b0100000, 0b00011, Ext, Ext.F32Ty, - Ext.PrimaryTy, "fcvt.s.q">, - Sched<[WriteFCvtF128ToF32, ReadFCvtF128ToF32]>; - - defm FCVT_Q_S : FPUnaryOp_r_frmlegacy_m<0b0100011, 0b00000, Ext, - Ext.PrimaryTy, Ext.F32Ty, - "fcvt.q.s">, - Sched<[WriteFCvtF32ToF128, ReadFCvtF32ToF128]>; - - defm FCVT_D_Q : FPUnaryOp_r_frm_m<0b0100001, 0b00011, Ext, Ext.F64Ty, - Ext.PrimaryTy, "fcvt.d.q">, - Sched<[WriteFCvtF128ToF64, ReadFCvtF128ToF64]>; - - defm FCVT_Q_D : FPUnaryOp_r_frmlegacy_m<0b0100011, 0b00001, Ext, - Ext.PrimaryTy, Ext.F64Ty, - "fcvt.q.d">, - Sched<[WriteFCvtF64ToF128, ReadFCvtF64ToF128]>; - - let SchedRW = [WriteFCmp128, ReadFCmp128, ReadFCmp128] in { - defm FEQ_Q : FPCmp_rr_m<0b1010011, 0b010, "feq.q", Ext, Commutable = 1>; - defm FLT_Q : FPCmp_rr_m<0b1010011, 0b001, "flt.q", Ext>; - defm FLE_Q : FPCmp_rr_m<0b1010011, 0b000, "fle.q", Ext>; - } - - let mayRaiseFPException = 0 in - defm FCLASS_Q : FPUnaryOp_r_m<0b1110011, 0b00000, 0b001, Ext, GPR, - Ext.PrimaryTy, "fclass.q">, - Sched<[WriteFClass128, ReadFClass128]>; - - let IsSignExtendingOpW = 1 in - defm FCVT_W_Q : FPUnaryOp_r_frm_m<0b1100011, 0b00000, Ext, GPR, - Ext.PrimaryTy, "fcvt.w.q">, - Sched<[WriteFCvtF128ToI32, ReadFCvtF128ToI32]>; - - let IsSignExtendingOpW = 1 in - defm FCVT_WU_Q : FPUnaryOp_r_frm_m<0b1100011, 0b00001, Ext, GPR, - Ext.PrimaryTy, "fcvt.wu.q">, - Sched<[WriteFCvtF128ToI32, ReadFCvtF128ToI32]>; - - let mayRaiseFPException = 0 in - defm FCVT_Q_W : FPUnaryOp_r_frmlegacy_m<0b1101011, 0b00000, Ext, - Ext.PrimaryTy, GPR, "fcvt.q.w">, - Sched<[WriteFCvtI32ToF128, ReadFCvtI32ToF128]>; - - let mayRaiseFPException = 0 in - defm FCVT_Q_WU : FPUnaryOp_r_frmlegacy_m<0b1101011, 0b00001, Ext, - Ext.PrimaryTy, GPR, "fcvt.q.wu">, - Sched<[WriteFCvtI32ToF128, ReadFCvtI32ToF128]>; -} // foreach Ext = QExts - -foreach Ext = QExtsRV64 in { - defm FCVT_L_Q : FPUnaryOp_r_frm_m<0b1100011, 0b00010, Ext, GPR, - Ext.PrimaryTy, "fcvt.l.q", [IsRV64]>, - Sched<[WriteFCvtF128ToI64, ReadFCvtF128ToI64]>; - - defm FCVT_LU_Q : FPUnaryOp_r_frm_m<0b1100011, 0b00011, Ext, GPR, - Ext.PrimaryTy, "fcvt.lu.q", [IsRV64]>, - Sched<[WriteFCvtF128ToI64, ReadFCvtF128ToI64]>; - - let mayRaiseFPException = 0 in - defm FCVT_Q_L : FPUnaryOp_r_frmlegacy_m<0b1101011, 0b00010, Ext, - Ext.PrimaryTy, GPR, "fcvt.q.l", - [IsRV64]>, - Sched<[WriteFCvtI64ToF128, ReadFCvtI64ToF128]>; - - let mayRaiseFPException = 0 in - defm FCVT_Q_LU : FPUnaryOp_r_frmlegacy_m<0b1101011, 0b00011, Ext, - Ext.PrimaryTy, GPR, "fcvt.q.lu", - [IsRV64]>, - Sched<[WriteFCvtI64ToF128, ReadFCvtI64ToF128]>; -} // foreach Ext = QExtsRV64 - -//===----------------------------------------------------------------------===// -// Assembler Pseudo Instructions (User-Level ISA, Version 2.2, Chapter 20) -//===----------------------------------------------------------------------===// - -let Predicates = [HasStdExtQ] in { - def : InstAlias<"flq $rd, (${rs1})", (FLQ FPR128:$rd, GPR:$rs1, 0), 0>; - def : InstAlias<"fsq $rs2, (${rs1})", (FSQ FPR128:$rs2, GPR:$rs1, 0), 0>; - - def : InstAlias<"fmv.q $rd, $rs", (FSGNJ_Q FPR128:$rd, FPR128:$rs, - FPR128:$rs)>; - def : InstAlias<"fabs.q $rd, $rs", (FSGNJX_Q FPR128:$rd, FPR128:$rs, - FPR128:$rs)>; - def : InstAlias<"fneg.q $rd, $rs", (FSGNJN_Q FPR128:$rd, FPR128:$rs, - FPR128:$rs)>; - - // fgt.q/fge.q are recognised by the GNU assembler but the canonical - // flt.q/fle.q forms will always be printed. Therefore, set a zero weight. - def : InstAlias<"fgt.q $rd, $rs, $rt", - (FLT_Q GPR:$rd, FPR128:$rt, FPR128:$rs), 0>; - def : InstAlias<"fge.q $rd, $rs, $rt", - (FLE_Q GPR:$rd, FPR128:$rt, FPR128:$rs), 0>; - - def PseudoFLQ : PseudoFloatLoad<"flq", FPR128>; - def PseudoFSQ : PseudoStore<"fsq", FPR128>; -} // Predicates = [HasStdExtQ] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td index 6afe88b805d35..aa70a9d03cc1f 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td @@ -338,56 +338,6 @@ class NDSRVInstVFPMAD funct6, string opcodestr> let RVVConstraint = VMConstraint; } -class NDSRVInstVD4DOT funct6, string opcodestr> - : RVInst<(outs VR:$vd), (ins VR:$vs1, VR:$vs2, VMaskOp:$vm), - opcodestr # "." # "vv", "$vd, $vs1, $vs2$vm", [], InstFormatR>, - SchedBinaryMC<"WriteVIMulAddV", "ReadVIMulAddV", "ReadVIMulAddV"> { - bits<5> vs2; - bits<5> vs1; - bits<5> vd; - bit vm; - - let Inst{31-26} = funct6; - let Inst{25} = vm; - let Inst{24-20} = vs2; - let Inst{19-15} = vs1; - let Inst{14-12} = 0b100; - let Inst{11-7} = vd; - let Inst{6-0} = OPC_CUSTOM_2.Value; - let hasSideEffects = 0; - let mayLoad = 0; - let mayStore = 0; - - let RVVConstraint = VMConstraint; -} - -//===----------------------------------------------------------------------===// -// Multiclass -//===----------------------------------------------------------------------===// - -let fprclass = !cast("FPR32") in -def SCALAR_F16_FPR32 : FPR_Info<16>; - -let hasSideEffects = 0 in -multiclass VPseudoVFPMAD_VF_RM { - foreach m = SCALAR_F16_FPR32.MxList in { - defm "" : VPseudoBinaryV_VF_RM, - SchedBinary<"WriteVFMulAddF", "ReadVFMulAddV", "ReadVFMulAddF", - m.MX, SCALAR_F16_FPR32.SEW, forcePassthruRead=true>; - } -} - -multiclass VPatVFPMADBinaryV_VX_RM vtilist> { - foreach vti = vtilist in { - defvar kind = "V"#vti.ScalarSuffix; - defm : VPatBinaryRoundingMode; - } -} - //===----------------------------------------------------------------------===// // XAndesPerf //===----------------------------------------------------------------------===// @@ -448,21 +398,9 @@ let Predicates = [HasVendorXAndesVPackFPH], def NDS_VFPMADT_VF : NDSRVInstVFPMAD<0b000010, "nds.vfpmadt">; def NDS_VFPMADB_VF : NDSRVInstVFPMAD<0b000011, "nds.vfpmadb">; } - -//===----------------------------------------------------------------------===// -// XAndesVDot -//===----------------------------------------------------------------------===// - -let Predicates = [HasVendorXAndesVDot], Uses = [VL, VTYPE] in { -def NDS_VD4DOTS_VV : NDSRVInstVD4DOT<0b000100, "nds.vd4dots">; -def NDS_VD4DOTU_VV : NDSRVInstVD4DOT<0b000111, "nds.vd4dotu">; -def NDS_VD4DOTSU_VV : NDSRVInstVD4DOT<0b000101, "nds.vd4dotsu">; -} } // DecoderNamespace = "XAndes" -//===----------------------------------------------------------------------===// -// Pseudo-instructions and codegen patterns -//===----------------------------------------------------------------------===// +// Patterns let Predicates = [HasVendorXAndesPerf] in { @@ -490,12 +428,3 @@ def : Sh1AddPat; def : Sh2AddPat; def : Sh3AddPat; } // Predicates = [HasVendorXAndesPerf, IsRV64] - -let Predicates = [HasVendorXAndesVPackFPH], - mayRaiseFPException = true in { -defm PseudoNDS_VFPMADT : VPseudoVFPMAD_VF_RM; -defm PseudoNDS_VFPMADB : VPseudoVFPMAD_VF_RM; -} // Predicates = [HasVendorXAndesVPackFPH] - -defm : VPatVFPMADBinaryV_VX_RM<"int_riscv_nds_vfpmadt", "PseudoNDS_VFPMADT", AllFP16Vectors>; -defm : VPatVFPMADBinaryV_VX_RM<"int_riscv_nds_vfpmadb", "PseudoNDS_VFPMADB", AllFP16Vectors>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td index 0ad654db42f5c..8a449d32e0104 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td @@ -175,28 +175,6 @@ def FLEQ_H : FPCmp_rr<0b1010010, 0b100, "fleq.h", FPR16>; } } // Predicates = [HasStdExtZfa, HasStdExtZfh] -let Predicates = [HasStdExtZfa, HasStdExtQ] in { -let isReMaterializable = 1, isAsCheapAsAMove = 1 in -def FLI_Q : FPFLI_r<0b1111011, 0b00001, 0b000, FPR128, "fli.q">; - -def FMINM_Q: FPALU_rr<0b0010111, 0b010, "fminm.q", FPR128, Commutable=1>; -def FMAXM_Q: FPALU_rr<0b0010111, 0b011, "fmaxm.q", FPR128, Commutable=1>; - -def FROUND_Q : FPUnaryOp_r_frm<0b0100011, 0b00100, FPR128, FPR128, "fround.q">; -def FROUNDNX_Q : FPUnaryOp_r_frm<0b0100011, 0b00101, FPR128, FPR128, - "froundnx.q">; - -def FLTQ_Q : FPCmp_rr<0b1010011, 0b101, "fltq.q", FPR128>; -def FLEQ_Q : FPCmp_rr<0b1010011, 0b100, "fleq.q", FPR128>; -} // Predicates = [HasStdExtZfa, HasStdExtQ] - -let Predicates = [HasStdExtZfa, HasStdExtQ, IsRV64] in { - let mayRaiseFPException = 0 in { - def FMVH_X_Q : FPUnaryOp_r<0b1110011, 0b00001, 0b000, GPR, FPR128, "fmvh.x.q">; - def FMVP_Q_X : FPBinaryOp_rr<0b1011011, 0b000, FPR128, GPR, "fmvp.q.x">; - } -} // Predicates = [HasStdExtZfa, HasStdExtQ, IsRV64] - //===----------------------------------------------------------------------===// // Pseudo-instructions and codegen patterns //===----------------------------------------------------------------------===// @@ -222,13 +200,6 @@ def : InstAlias<"fgeq.h $rd, $rs, $rt", (FLEQ_H GPR:$rd, FPR16:$rt, FPR16:$rs), 0>; } -let Predicates = [HasStdExtZfa, HasStdExtQ] in { -def : InstAlias<"fgtq.q $rd, $rs, $rt", - (FLTQ_Q GPR:$rd, FPR128:$rt, FPR128:$rs), 0>; -def : InstAlias<"fgeq.q $rd, $rs, $rt", - (FLEQ_Q GPR:$rd, FPR128:$rt, FPR128:$rs), 0>; -} - //===----------------------------------------------------------------------===// // Codegen patterns //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td index e44bdcb4e2f0f..56c870414596b 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td @@ -37,7 +37,7 @@ class CBO_r optype, string opcodestr> let hasSideEffects = 0, mayLoad = 1, mayStore = 1 in class Prefetch_ri optype, string opcodestr> - : RVInstS<0b110, OPC_OP_IMM, (outs), (ins GPRMem:$rs1, simm12_lsb00000:$imm12), + : RVInstS<0b110, OPC_OP_IMM, (outs), (ins GPR:$rs1, simm12_lsb00000:$imm12), opcodestr, "${imm12}(${rs1})"> { let Inst{11-7} = 0b00000; let rs2 = optype; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td index a3203f288b545..3e526273c0768 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td @@ -11,20 +11,6 @@ // //===----------------------------------------------------------------------===// -//===----------------------------------------------------------------------===// -// RISC-V specific DAG Nodes. -//===----------------------------------------------------------------------===// - -def SDT_RISCV_LD_RV32 - : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisPtrTy<2>]>; -def SDT_RISCV_SD_RV32 - : SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisPtrTy<2>]>; - -def riscv_ld_rv32 : RVSDNode<"LD_RV32", SDT_RISCV_LD_RV32, - [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; -def riscv_st_rv32 : RVSDNode<"SD_RV32", SDT_RISCV_SD_RV32, - [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; - //===----------------------------------------------------------------------===// // Instruction Class Templates //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index 65a6f54d6c8aa..c6f6c9007b2b1 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -48,9 +48,6 @@ static_assert(RISCV::F31_F == RISCV::F0_F + 31, static_assert(RISCV::F1_D == RISCV::F0_D + 1, "Register list not consecutive"); static_assert(RISCV::F31_D == RISCV::F0_D + 31, "Register list not consecutive"); -static_assert(RISCV::F1_Q == RISCV::F0_Q + 1, "Register list not consecutive"); -static_assert(RISCV::F31_Q == RISCV::F0_Q + 31, - "Register list not consecutive"); static_assert(RISCV::V1 == RISCV::V0 + 1, "Register list not consecutive"); static_assert(RISCV::V31 == RISCV::V0 + 31, "Register list not consecutive"); diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td index cd725ca6166e2..eb3d5e553f1ef 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -45,13 +45,6 @@ class RISCVReg64 let SubRegIndices = [sub_32]; } -def sub_64 : SubRegIndex<64>; -class RISCVReg128 - : RISCVRegWithSubRegs { - let SubRegIndices = [sub_64]; -} - let FallbackRegAltNameIndex = NoRegAltName in def ABIRegAltName : RegAltNameIndex; @@ -419,11 +412,6 @@ let RegAltNameIndices = [ABIRegAltName] in { def F#Index#_D : RISCVReg64("F"#Index#"_F")>, DwarfRegAlias("F"#Index#"_H")>; } - - foreach Index = 0-31 in { - def F#Index#_Q : RISCVReg128("F"#Index#"_D")>, - DwarfRegAlias("F"#Index#"_H")>; - } } // The order of registers represents the preferred allocation sequence, @@ -474,15 +462,6 @@ def FPR64C : RISCVRegisterClass<[f64], 64, (add (sequence "F%u_D", 8, 9) )>; -def FPR128 : RISCVRegisterClass<[f128], 128, (add - (sequence "F%u_Q", 15, 10), - (sequence "F%u_Q", 0, 7), - (sequence "F%u_Q", 16, 17), - (sequence "F%u_Q", 28, 31), - (sequence "F%u_Q", 8, 9), - (sequence "F%u_Q", 18, 27) -)>; - //===----------------------------------------------------------------------===// // GPR Classes for "H/F/D in X" //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td b/llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td index 248d2273ef2f4..be9c4ddf7cf48 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td +++ b/llvm/lib/Target/RISCV/RISCVSchedGenericOOO.td @@ -492,7 +492,6 @@ def : ReadAdvance; //===----------------------------------------------------------------------===// // Unsupported extensions //===----------------------------------------------------------------------===// -defm : UnsupportedSchedQ; defm : UnsupportedSchedV; defm : UnsupportedSchedZvk; defm : UnsupportedSchedSFB; diff --git a/llvm/lib/Target/RISCV/RISCVSchedMIPSP8700.td b/llvm/lib/Target/RISCV/RISCVSchedMIPSP8700.td index 8ba4cd0acdd6c..a1127966e8417 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedMIPSP8700.td +++ b/llvm/lib/Target/RISCV/RISCVSchedMIPSP8700.td @@ -263,7 +263,6 @@ def : ReadAdvance; def : ReadAdvance; // Unsupported extensions. -defm : UnsupportedSchedQ; defm : UnsupportedSchedV; defm : UnsupportedSchedZbc; defm : UnsupportedSchedZbs; diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/llvm/lib/Target/RISCV/RISCVSchedRocket.td index 4c4654ba2fc0f..1148581415380 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedRocket.td +++ b/llvm/lib/Target/RISCV/RISCVSchedRocket.td @@ -250,7 +250,6 @@ def : ReadAdvance; //===----------------------------------------------------------------------===// // Unsupported extensions -defm : UnsupportedSchedQ; defm : UnsupportedSchedV; defm : UnsupportedSchedZabha; defm : UnsupportedSchedZba; diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td index af64a871a9292..f4d2073d3b52d 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td @@ -1300,7 +1300,6 @@ foreach mx = SchedMxList in { //===----------------------------------------------------------------------===// // Unsupported extensions -defm : UnsupportedSchedQ; defm : UnsupportedSchedZabha; defm : UnsupportedSchedZbc; defm : UnsupportedSchedZbkb; diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td index 370ea64699383..1ac05c9444725 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td @@ -1231,7 +1231,6 @@ defm "" : LMULReadAdvance<"ReadVSM3MEV", 0>; //===----------------------------------------------------------------------===// // Unsupported extensions -defm : UnsupportedSchedQ; defm : UnsupportedSchedZabha; defm : UnsupportedSchedZbc; defm : UnsupportedSchedZbkb; diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP500.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP500.td index 5933d73174f79..ca116e0c54f3f 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP500.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP500.td @@ -348,7 +348,6 @@ def : ReadAdvance; //===----------------------------------------------------------------------===// // Unsupported extensions -defm : UnsupportedSchedQ; defm : UnsupportedSchedV; defm : UnsupportedSchedZabha; defm : UnsupportedSchedZbc; diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td index 7c04d1c54473d..2bfd5ef811c7b 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td @@ -1487,7 +1487,6 @@ defm "" : LMULReadAdvance<"ReadVSM3MEV", 0>; //===----------------------------------------------------------------------===// // Unsupported extensions -defm : UnsupportedSchedQ; defm : UnsupportedSchedZabha; defm : UnsupportedSchedZbc; defm : UnsupportedSchedZbkb; diff --git a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td index 8948694c420a0..c21ab969d12ac 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td @@ -342,7 +342,6 @@ def : ReadAdvance; //===----------------------------------------------------------------------===// // Unsupported extensions -defm : UnsupportedSchedQ; defm : UnsupportedSchedV; defm : UnsupportedSchedXsfvcp; defm : UnsupportedSchedZabha; diff --git a/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR345.td b/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR345.td index 815c2da992a11..e509abc9f922e 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR345.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR345.td @@ -199,7 +199,6 @@ multiclass SCR3_Unsupported : multiclass SCR4_SCR5_Unsupported : SCR_Unsupported, - UnsupportedSchedQ, UnsupportedSchedZfhmin; // Bypasses (none) diff --git a/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR7.td b/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR7.td index decd578360753..4631474a945cb 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR7.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR7.td @@ -241,7 +241,6 @@ multiclass SCR7_Other { // Unsupported scheduling classes for SCR7. multiclass SCR7_Unsupported { - defm : UnsupportedSchedQ; defm : UnsupportedSchedSFB; defm : UnsupportedSchedV; defm : UnsupportedSchedXsfvcp; diff --git a/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td b/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td index 5322de100d0ad..2afe02552974e 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td +++ b/llvm/lib/Target/RISCV/RISCVSchedTTAscalonD8.td @@ -318,7 +318,6 @@ def : ReadAdvance; //===----------------------------------------------------------------------===// // Unsupported extensions -defm : UnsupportedSchedQ; defm : UnsupportedSchedV; defm : UnsupportedSchedXsfvcp; defm : UnsupportedSchedZabha; diff --git a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td index 3076a2ebb813d..16d192feafd29 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td +++ b/llvm/lib/Target/RISCV/RISCVSchedXiangShanNanHu.td @@ -306,7 +306,6 @@ def : ReadAdvance; //===----------------------------------------------------------------------===// // Unsupported extensions -defm : UnsupportedSchedQ; defm : UnsupportedSchedV; defm : UnsupportedSchedZfa; defm : UnsupportedSchedZfhmin; diff --git a/llvm/lib/Target/RISCV/RISCVSchedule.td b/llvm/lib/Target/RISCV/RISCVSchedule.td index c8b0f0c9325f7..f5c17d85f629d 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedule.td +++ b/llvm/lib/Target/RISCV/RISCVSchedule.td @@ -43,33 +43,26 @@ def WriteAtomicSTD : SchedWrite; // Atomic store double word def WriteFAdd16 : SchedWrite; // 16-bit floating point addition/subtraction def WriteFAdd32 : SchedWrite; // 32-bit floating point addition/subtraction def WriteFAdd64 : SchedWrite; // 64-bit floating point addition/subtraction -def WriteFAdd128 : SchedWrite; // 128-bit floating point addition/subtraction def WriteFMul16 : SchedWrite; // 16-bit floating point multiply def WriteFMul32 : SchedWrite; // 32-bit floating point multiply def WriteFMul64 : SchedWrite; // 64-bit floating point multiply -def WriteFMul128 : SchedWrite; // 128-bit floating point multiply def WriteFMA16 : SchedWrite; // 16-bit floating point fused multiply-add def WriteFMA32 : SchedWrite; // 32-bit floating point fused multiply-add def WriteFMA64 : SchedWrite; // 64-bit floating point fused multiply-add -def WriteFMA128 : SchedWrite; // 128-bit floating point fused multiply-add def WriteFDiv16 : SchedWrite; // 16-bit floating point divide def WriteFDiv32 : SchedWrite; // 32-bit floating point divide def WriteFDiv64 : SchedWrite; // 64-bit floating point divide -def WriteFDiv128 : SchedWrite; // 128-bit floating point divide def WriteFSqrt16 : SchedWrite; // 16-bit floating point sqrt def WriteFSqrt32 : SchedWrite; // 32-bit floating point sqrt def WriteFSqrt64 : SchedWrite; // 64-bit floating point sqrt -def WriteFSqrt128 : SchedWrite; // 128-bit floating point sqrt // Integer to float conversions def WriteFCvtI32ToF16 : SchedWrite; def WriteFCvtI32ToF32 : SchedWrite; def WriteFCvtI32ToF64 : SchedWrite; -def WriteFCvtI32ToF128 : SchedWrite; def WriteFCvtI64ToF16 : SchedWrite; // RV64I only def WriteFCvtI64ToF32 : SchedWrite; // RV64I only def WriteFCvtI64ToF64 : SchedWrite; // RV64I only -def WriteFCvtI64ToF128 : SchedWrite; // RV64I only // Float to integer conversions def WriteFCvtF16ToI32 : SchedWrite; @@ -78,8 +71,6 @@ def WriteFCvtF32ToI32 : SchedWrite; def WriteFCvtF32ToI64 : SchedWrite; // RV64I only def WriteFCvtF64ToI32 : SchedWrite; def WriteFCvtF64ToI64 : SchedWrite; // RV64I only -def WriteFCvtF128ToI32 : SchedWrite; -def WriteFCvtF128ToI64 : SchedWrite; // RV64I only // Float to float conversions def WriteFCvtF32ToF64 : SchedWrite; @@ -88,10 +79,6 @@ def WriteFCvtF16ToF32 : SchedWrite; def WriteFCvtF32ToF16 : SchedWrite; def WriteFCvtF16ToF64 : SchedWrite; def WriteFCvtF64ToF16 : SchedWrite; -def WriteFCvtF128ToF32 : SchedWrite; -def WriteFCvtF128ToF64 : SchedWrite; -def WriteFCvtF32ToF128 : SchedWrite; -def WriteFCvtF64ToF128 : SchedWrite; // Zfa fround instructions. def WriteFRoundF32 : SchedWrite; @@ -101,19 +88,15 @@ def WriteFRoundF16 : SchedWrite; def WriteFClass16 : SchedWrite; // 16-bit floating point classify def WriteFClass32 : SchedWrite; // 32-bit floating point classify def WriteFClass64 : SchedWrite; // 64-bit floating point classify -def WriteFClass128 : SchedWrite; // 128-bit floating point classify def WriteFCmp16 : SchedWrite; // 16-bit floating point compare def WriteFCmp32 : SchedWrite; // 32-bit floating point compare def WriteFCmp64 : SchedWrite; // 64-bit floating point compare -def WriteFCmp128 : SchedWrite; // 128-bit floating point compare def WriteFSGNJ16 : SchedWrite; // 16-bit floating point sign-injection def WriteFSGNJ32 : SchedWrite; // 32-bit floating point sign-injection def WriteFSGNJ64 : SchedWrite; // 64-bit floating point sign-injection -def WriteFSGNJ128 : SchedWrite; // 128-bit floating point sign-injection def WriteFMinMax16 : SchedWrite; // 16-bit floating point min or max def WriteFMinMax32 : SchedWrite; // 32-bit floating point min or max def WriteFMinMax64 : SchedWrite; // 64-bit floating point min or max -def WriteFMinMax128 : SchedWrite; // 128-bit floating point min or max def WriteFMovF16ToI16 : SchedWrite; def WriteFMovI16ToF16 : SchedWrite; @@ -129,11 +112,9 @@ def WriteFLI64 : SchedWrite; // Floating point constant load def WriteFLD16 : SchedWrite; // Floating point sp load def WriteFLD32 : SchedWrite; // Floating point sp load def WriteFLD64 : SchedWrite; // Floating point dp load -def WriteFLD128 : SchedWrite; // Floating point qp load def WriteFST16 : SchedWrite; // Floating point sp store def WriteFST32 : SchedWrite; // Floating point sp store def WriteFST64 : SchedWrite; // Floating point dp store -def WriteFST128 : SchedWrite; // Floating point qp store // short forward branch for Bullet def WriteSFB : SchedWrite; @@ -175,55 +156,42 @@ def ReadAtomicSTD : SchedRead; // Atomic store double word def ReadFAdd16 : SchedRead; // 16-bit floating point addition/subtraction def ReadFAdd32 : SchedRead; // 32-bit floating point addition/subtraction def ReadFAdd64 : SchedRead; // 64-bit floating point addition/subtraction -def ReadFAdd128 : SchedRead; // 128-bit floating point addition/subtraction def ReadFMul16 : SchedRead; // 16-bit floating point multiply def ReadFMul32 : SchedRead; // 32-bit floating point multiply def ReadFMul64 : SchedRead; // 64-bit floating point multiply -def ReadFMul128 : SchedRead; // 128-bit floating point multiply def ReadFMA16 : SchedRead; // 16-bit floating point fused multiply-add def ReadFMA16Addend : SchedRead; // 16-bit floating point fused multiply-add (addend) def ReadFMA32 : SchedRead; // 32-bit floating point fused multiply-add def ReadFMA32Addend : SchedRead; // 32-bit floating point fused multiply-add (addend) def ReadFMA64 : SchedRead; // 64-bit floating point fused multiply-add def ReadFMA64Addend : SchedRead; // 64-bit floating point fused multiply-add (addend) -def ReadFMA128 : SchedRead; // 128-bit floating point fused multiply-add -def ReadFMA128Addend: SchedRead; // 128-bit floating point fused multiply-add (addend) def ReadFDiv16 : SchedRead; // 16-bit floating point divide def ReadFDiv32 : SchedRead; // 32-bit floating point divide def ReadFDiv64 : SchedRead; // 64-bit floating point divide -def ReadFDiv128 : SchedRead; // 128-bit floating point divide def ReadFSqrt16 : SchedRead; // 16-bit floating point sqrt def ReadFSqrt32 : SchedRead; // 32-bit floating point sqrt def ReadFSqrt64 : SchedRead; // 64-bit floating point sqrt -def ReadFSqrt128 : SchedRead; // 128-bit floating point sqrt def ReadFCmp16 : SchedRead; def ReadFCmp32 : SchedRead; def ReadFCmp64 : SchedRead; -def ReadFCmp128 : SchedRead; def ReadFSGNJ16 : SchedRead; def ReadFSGNJ32 : SchedRead; def ReadFSGNJ64 : SchedRead; -def ReadFSGNJ128 : SchedRead; def ReadFMinMax16 : SchedRead; def ReadFMinMax32 : SchedRead; def ReadFMinMax64 : SchedRead; -def ReadFMinMax128 : SchedRead; def ReadFCvtF16ToI32 : SchedRead; def ReadFCvtF16ToI64 : SchedRead; def ReadFCvtF32ToI32 : SchedRead; def ReadFCvtF32ToI64 : SchedRead; def ReadFCvtF64ToI32 : SchedRead; def ReadFCvtF64ToI64 : SchedRead; -def ReadFCvtF128ToI32 : SchedRead; -def ReadFCvtF128ToI64 : SchedRead; def ReadFCvtI32ToF16 : SchedRead; def ReadFCvtI32ToF32 : SchedRead; def ReadFCvtI32ToF64 : SchedRead; -def ReadFCvtI32ToF128 : SchedRead; def ReadFCvtI64ToF16 : SchedRead; def ReadFCvtI64ToF32 : SchedRead; def ReadFCvtI64ToF64 : SchedRead; -def ReadFCvtI64ToF128 : SchedRead; def ReadFMovF16ToI16 : SchedRead; def ReadFMovI16ToF16 : SchedRead; def ReadFMovF32ToI32 : SchedRead; @@ -236,19 +204,12 @@ def ReadFCvtF16ToF32 : SchedRead; def ReadFCvtF32ToF16 : SchedRead; def ReadFCvtF16ToF64 : SchedRead; def ReadFCvtF64ToF16 : SchedRead; -def ReadFCvtF128ToF32 : SchedRead; -def ReadFCvtF128ToF64 : SchedRead; -def ReadFCvtF32ToF128 : SchedRead; -def ReadFCvtF64ToF128 : SchedRead; - def ReadFRoundF16 : SchedRead; def ReadFRoundF32 : SchedRead; def ReadFRoundF64 : SchedRead; - def ReadFClass16 : SchedRead; def ReadFClass32 : SchedRead; def ReadFClass64 : SchedRead; -def ReadFClass128 : SchedRead; // For CPUs that support Zfhmin, but not Zfh. multiclass UnsupportedSchedZfh { @@ -305,50 +266,7 @@ def : ReadAdvance; } // Unsupported = true } -multiclass UnsupportedSchedQ { -let Unsupported = true in { -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; - -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -} // Unsupported = true -} - -multiclass UnsupportedSchedD : UnsupportedSchedQ { +multiclass UnsupportedSchedD { let Unsupported = true in { def : WriteRes; def : WriteRes; diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 15dd4d57727dd..d11ce46bf78b5 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -37,7 +37,6 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Vectorize/EVLIndVarSimplify.h" #include "llvm/Transforms/Vectorize/LoopIdiomVectorize.h" #include using namespace llvm; @@ -646,12 +645,6 @@ void RISCVTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { OptimizationLevel Level) { LPM.addPass(LoopIdiomVectorizePass(LoopIdiomVectorizeStyle::Predicated)); }); - - PB.registerVectorizerEndEPCallback( - [](FunctionPassManager &FPM, OptimizationLevel Level) { - if (Level.isOptimizingForSpeed()) - FPM.addPass(createFunctionToLoopPassAdaptor(EVLIndVarSimplifyPass())); - }); } yaml::MachineFunctionInfo * diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp index b336732ec4b64..22fc1ca2c4c2d 100644 --- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp @@ -1081,19 +1081,15 @@ void SPIRVEmitIntrinsics::deduceOperandElementType( return; Value *Op0 = Ref->getOperand(0); Value *Op1 = Ref->getOperand(1); - bool Incomplete0 = isTodoType(Op0); - bool Incomplete1 = isTodoType(Op1); + Type *ElemTy0 = GR->findDeducedElementType(Op0); Type *ElemTy1 = GR->findDeducedElementType(Op1); - Type *ElemTy0 = (Incomplete0 && !Incomplete1 && ElemTy1) - ? nullptr - : GR->findDeducedElementType(Op0); if (ElemTy0) { KnownElemTy = ElemTy0; - Incomplete = Incomplete0; + Incomplete = isTodoType(Op0); Ops.push_back(std::make_pair(Op1, 1)); } else if (ElemTy1) { KnownElemTy = ElemTy1; - Incomplete = Incomplete1; + Incomplete = isTodoType(Op1); Ops.push_back(std::make_pair(Op0, 0)); } } else if (CallInst *CI = dyn_cast(I)) { @@ -1112,6 +1108,8 @@ void SPIRVEmitIntrinsics::deduceOperandElementType( IRBuilder<> B(Ctx); for (auto &OpIt : Ops) { Value *Op = OpIt.first; + if (Op->use_empty()) + continue; if (AskOps && !AskOps->contains(Op)) continue; Type *AskTy = nullptr; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 9be3b39ce16fa..8c6c987c552cc 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -58403,7 +58403,9 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT, ((VT.is256BitVector() && (EltSizeInBits >= 32 || Subtarget.hasInt256())) || (VT.is512BitVector() && Subtarget.useAVX512Regs() && - (EltSizeInBits >= 32 || Subtarget.useBWIRegs())))) { + (EltSizeInBits >= 32 || Subtarget.hasVBMI2())))) { + // TODO: Relax VBMI requirement for repeated shuffle ops - currently + // limited to targets that should always have good cross lane shuffles. SDValue Concat0 = CombineSubOperand(VT, Ops, 0); SDValue Concat1 = CombineSubOperand(VT, Ops, 1); if (Concat0 || Concat1 || diff --git a/llvm/lib/TargetParser/RISCVISAInfo.cpp b/llvm/lib/TargetParser/RISCVISAInfo.cpp index e76ddd4b648dc..ad7bdcd112459 100644 --- a/llvm/lib/TargetParser/RISCVISAInfo.cpp +++ b/llvm/lib/TargetParser/RISCVISAInfo.cpp @@ -925,9 +925,8 @@ void RISCVISAInfo::updateImpliedLengths() { assert(FLen == 0 && MaxELenFp == 0 && MaxELen == 0 && MinVLen == 0 && "Expected lengths to be initialied to zero"); - if (Exts.count("q")) - FLen = 128; - else if (Exts.count("d")) + // TODO: Handle q extension. + if (Exts.count("d")) FLen = 64; else if (Exts.count("f")) FLen = 32; diff --git a/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp index 7ea7937d8b827..9cf4e448c9b6f 100644 --- a/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp @@ -91,12 +91,8 @@ PreservedAnalyses ForceFunctionAttrsPass::run(Module &M, bool Changed = false; if (!CSVFilePath.empty()) { auto BufferOrError = MemoryBuffer::getFileOrSTDIN(CSVFilePath); - if (!BufferOrError) { - std::error_code EC = BufferOrError.getError(); - M.getContext().emitError("cannot open CSV file: " + EC.message()); - return PreservedAnalyses::all(); - } - + if (!BufferOrError) + report_fatal_error("Cannot open CSV file."); StringRef Buffer = BufferOrError.get()->getBuffer(); auto MemoryBuffer = MemoryBuffer::getMemBuffer(Buffer); line_iterator It(*MemoryBuffer); diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 616eeae3b1fec..4947a0da3bdb0 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -2151,8 +2151,8 @@ void SampleProfileLoader::removePseudoProbeInstsDiscriminator(Module &M) { std::optional DwarfDiscriminator = PseudoProbeDwarfDiscriminator::extractDwarfBaseDiscriminator( Discriminator); - I.setDebugLoc( - DIL->cloneWithDiscriminator(DwarfDiscriminator.value_or(0))); + I.setDebugLoc(DIL->cloneWithDiscriminator( + DwarfDiscriminator ? *DwarfDiscriminator : 0)); } } } diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 24026e310ad11..a031d2e79c7f9 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -5663,14 +5663,15 @@ static bool combineInstructionsOverFunction( // Iterate while there is work to do. unsigned Iteration = 0; while (true) { - if (Iteration >= Opts.MaxIterations && !VerifyFixpoint) { + ++Iteration; + + if (Iteration > Opts.MaxIterations && !VerifyFixpoint) { LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << Opts.MaxIterations << " on " << F.getName() << " reached; stopping without verifying fixpoint\n"); break; } - ++Iteration; ++NumWorklistIterations; LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " << F.getName() << "\n"); diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp index da5be383df15c..f2d6e268743eb 100644 --- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -1563,12 +1563,8 @@ removeEntryFromStack(const StackEntry &E, ConstraintInfo &Info, static bool checkOrAndOpImpliedByOther( FactOrCheck &CB, ConstraintInfo &Info, Module *ReproducerModule, SmallVectorImpl &ReproducerCondStack, - SmallVectorImpl &DFSInStack, - SmallVectorImpl &ToRemove) { + SmallVectorImpl &DFSInStack) { Instruction *JoinOp = CB.getContextInst(); - if (JoinOp->use_empty()) - return false; - CmpInst *CmpToCheck = cast(CB.getInstructionToSimplify()); unsigned OtherOpIdx = JoinOp->getOperand(0) == CmpToCheck ? 1 : 0; @@ -1615,12 +1611,15 @@ static bool checkOrAndOpImpliedByOther( if (auto ImpliedCondition = checkCondition(CmpToCheck->getPredicate(), CmpToCheck->getOperand(0), CmpToCheck->getOperand(1), CmpToCheck, Info)) { - if (IsOr == *ImpliedCondition) - JoinOp->replaceAllUsesWith( + if (IsOr && isa(JoinOp)) { + JoinOp->setOperand( + OtherOpIdx == 0 ? 2 : 0, ConstantInt::getBool(JoinOp->getType(), *ImpliedCondition)); - else - JoinOp->replaceAllUsesWith(JoinOp->getOperand(OtherOpIdx)); - ToRemove.push_back(JoinOp); + } else + JoinOp->setOperand( + 1 - OtherOpIdx, + ConstantInt::getBool(JoinOp->getType(), *ImpliedCondition)); + return true; } @@ -1853,9 +1852,9 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI, ReproducerModule.get(), ReproducerCondStack, S.DT, ToRemove); if (!Simplified && match(CB.getContextInst(), m_LogicalOp(m_Value(), m_Value()))) { - Simplified = checkOrAndOpImpliedByOther( - CB, Info, ReproducerModule.get(), ReproducerCondStack, DFSInStack, - ToRemove); + Simplified = + checkOrAndOpImpliedByOther(CB, Info, ReproducerModule.get(), + ReproducerCondStack, DFSInStack); } Changed |= Simplified; } else if (auto *MinMax = dyn_cast(Inst)) { diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp index d3771c0903456..0087d037f8cf2 100644 --- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -1123,10 +1123,14 @@ static bool replaceIfSimplePointerUse(const TargetTransformInfo &TTI, static bool handleMemIntrinsicPtrUse(MemIntrinsic *MI, Value *OldV, Value *NewV) { IRBuilder<> B(MI); + MDNode *TBAA = MI->getMetadata(LLVMContext::MD_tbaa); + MDNode *ScopeMD = MI->getMetadata(LLVMContext::MD_alias_scope); + MDNode *NoAliasMD = MI->getMetadata(LLVMContext::MD_noalias); + if (auto *MSI = dyn_cast(MI)) { B.CreateMemSet(NewV, MSI->getValue(), MSI->getLength(), MSI->getDestAlign(), false, // isVolatile - MI->getAAMetadata()); + TBAA, ScopeMD, NoAliasMD); } else if (auto *MTI = dyn_cast(MI)) { Value *Src = MTI->getRawSource(); Value *Dest = MTI->getRawDest(); @@ -1139,22 +1143,23 @@ static bool handleMemIntrinsicPtrUse(MemIntrinsic *MI, Value *OldV, Dest = NewV; if (auto *MCI = dyn_cast(MTI)) { + MDNode *TBAAStruct = MTI->getMetadata(LLVMContext::MD_tbaa_struct); if (MCI->isForceInlined()) B.CreateMemCpyInline(Dest, MTI->getDestAlign(), Src, MTI->getSourceAlign(), MTI->getLength(), false, // isVolatile - MI->getAAMetadata()); + TBAA, TBAAStruct, ScopeMD, NoAliasMD); else B.CreateMemCpy(Dest, MTI->getDestAlign(), Src, MTI->getSourceAlign(), MTI->getLength(), false, // isVolatile - MI->getAAMetadata()); + TBAA, TBAAStruct, ScopeMD, NoAliasMD); } else { assert(isa(MTI)); B.CreateMemMove(Dest, MTI->getDestAlign(), Src, MTI->getSourceAlign(), MTI->getLength(), false, // isVolatile - MI->getAAMetadata()); + TBAA, ScopeMD, NoAliasMD); } } else llvm_unreachable("unhandled MemIntrinsic"); diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp index d6bd92d520e28..5bba3016ba4a1 100644 --- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp +++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp @@ -790,8 +790,7 @@ struct LoopFuser { << " iterations of the first loop. \n"); ValueToValueMapTy VMap; - FC0.Peeled = - peelLoop(FC0.L, PeelCount, false, &LI, &SE, DT, &AC, true, VMap); + FC0.Peeled = peelLoop(FC0.L, PeelCount, &LI, &SE, DT, &AC, true, VMap); if (FC0.Peeled) { LLVM_DEBUG(dbgs() << "Done Peeling\n"); diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index f33c84c307ab8..8f5d1ecba982d 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -1026,6 +1026,14 @@ bool LoopIdiomRecognize::processLoopStridedStore( SmallPtrSetImpl &Stores, const SCEVAddRecExpr *Ev, const SCEV *BECount, bool IsNegStride, bool IsLoopMemset) { Module *M = TheStore->getModule(); + Value *SplatValue = isBytewiseValue(StoredVal, *DL); + Constant *PatternValue = nullptr; + + if (!SplatValue) + PatternValue = getMemSetPatternValue(StoredVal, DL); + + assert((SplatValue || PatternValue) && + "Expected either splat value or pattern value."); // The trip count of the loop and the base pointer of the addrec SCEV is // guaranteed to be loop invariant, which means that it should dominate the @@ -1087,6 +1095,9 @@ bool LoopIdiomRecognize::processLoopStridedStore( Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator()); + if (!SplatValue && !isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16)) + return Changed; + AAMDNodes AATags = TheStore->getAAMetadata(); for (Instruction *Store : Stores) AATags = AATags.merge(Store->getAAMetadata()); @@ -1096,11 +1107,12 @@ bool LoopIdiomRecognize::processLoopStridedStore( AATags = AATags.extendTo(-1); CallInst *NewCall; - if (Value *SplatValue = isBytewiseValue(StoredVal, *DL)) { - NewCall = Builder.CreateMemSet(BasePtr, SplatValue, NumBytes, - MaybeAlign(StoreAlignment), - /*isVolatile=*/false, AATags); - } else if (isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16)) { + if (SplatValue) { + NewCall = Builder.CreateMemSet( + BasePtr, SplatValue, NumBytes, MaybeAlign(StoreAlignment), + /*isVolatile=*/false, AATags.TBAA, AATags.Scope, AATags.NoAlias); + } else { + assert (isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16)); // Everything is emitted in default address space Type *Int8PtrTy = DestInt8PtrTy; @@ -1111,18 +1123,23 @@ bool LoopIdiomRecognize::processLoopStridedStore( // Otherwise we should form a memset_pattern16. PatternValue is known to be // an constant array of 16-bytes. Plop the value into a mergable global. - Constant *PatternValue = getMemSetPatternValue(StoredVal, DL); - assert(PatternValue && "Expected pattern value."); GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true, GlobalValue::PrivateLinkage, PatternValue, ".memset_pattern"); GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); // Ok to merge these. GV->setAlignment(Align(16)); - NewCall = Builder.CreateCall(MSP, {BasePtr, GV, NumBytes}); - NewCall->setAAMetadata(AATags); - } else { - // Neither a memset, nor memset_pattern16 - return Changed; + Value *PatternPtr = GV; + NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes}); + + // Set the TBAA info if present. + if (AATags.TBAA) + NewCall->setMetadata(LLVMContext::MD_tbaa, AATags.TBAA); + + if (AATags.Scope) + NewCall->setMetadata(LLVMContext::MD_alias_scope, AATags.Scope); + + if (AATags.NoAlias) + NewCall->setMetadata(LLVMContext::MD_noalias, AATags.NoAlias); } NewCall->setDebugLoc(TheStore->getDebugLoc()); @@ -1413,20 +1430,21 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( // by previous checks. if (!IsAtomic) { if (UseMemMove) - NewCall = Builder.CreateMemMove(StoreBasePtr, StoreAlign, LoadBasePtr, - LoadAlign, NumBytes, - /*isVolatile=*/false, AATags); + NewCall = Builder.CreateMemMove( + StoreBasePtr, StoreAlign, LoadBasePtr, LoadAlign, NumBytes, + /*isVolatile=*/false, AATags.TBAA, AATags.Scope, AATags.NoAlias); else NewCall = Builder.CreateMemCpy(StoreBasePtr, StoreAlign, LoadBasePtr, LoadAlign, - NumBytes, /*isVolatile=*/false, AATags); + NumBytes, /*isVolatile=*/false, AATags.TBAA, + AATags.TBAAStruct, AATags.Scope, AATags.NoAlias); } else { // Create the call. // Note that unordered atomic loads/stores are *required* by the spec to // have an alignment but non-atomic loads/stores may not. NewCall = Builder.CreateElementUnorderedAtomicMemCpy( StoreBasePtr, *StoreAlign, LoadBasePtr, *LoadAlign, NumBytes, StoreSize, - AATags); + AATags.TBAA, AATags.TBAAStruct, AATags.Scope, AATags.NoAlias); } NewCall->setDebugLoc(TheStore->getDebugLoc()); diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index 0b9fee5727c6f..d84b74dd0eecc 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -1314,8 +1314,7 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, }); ValueToValueMapTy VMap; - if (peelLoop(L, PP.PeelCount, PP.PeelLast, LI, &SE, DT, &AC, PreserveLCSSA, - VMap)) { + if (peelLoop(L, PP.PeelCount, LI, &SE, DT, &AC, PreserveLCSSA, VMap)) { simplifyLoopAfterUnroll(L, true, LI, &SE, &DT, &AC, &TTI, nullptr); // If the loop was peeled, we already "used up" the profile information // we had, so we don't want to unroll or peel again. diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp index 5487dbef8a434..0e0c012a9d676 100644 --- a/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -81,28 +81,17 @@ MetadataPredicate createIdentityMDPredicate(const Function &F, return [](const Metadata *MD) { return false; }; DISubprogram *SPClonedWithinModule = F.getSubprogram(); - - // Don't clone inlined subprograms. - auto ShouldKeep = [SPClonedWithinModule](const DISubprogram *SP) -> bool { - return SP != SPClonedWithinModule; - }; - return [=](const Metadata *MD) { // Avoid cloning types, compile units, and (other) subprograms. if (isa(MD) || isa(MD)) return true; if (auto *SP = dyn_cast(MD)) - return ShouldKeep(SP); + return SP != SPClonedWithinModule; // If a subprogram isn't going to be cloned skip its lexical blocks as well. if (auto *LScope = dyn_cast(MD)) - return ShouldKeep(LScope->getSubprogram()); - - // Avoid cloning local variables of subprograms that won't be cloned. - if (auto *DV = dyn_cast(MD)) - if (auto *S = dyn_cast_or_null(DV->getScope())) - return ShouldKeep(S->getSubprogram()); + return LScope->getSubprogram() != SPClonedWithinModule; return false; }; diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp index f15252b5f77e1..f6ace9c4e5d2f 100644 --- a/llvm/lib/Transforms/Utils/LoopPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -49,7 +49,6 @@ using namespace llvm::PatternMatch; #define DEBUG_TYPE "loop-peel" STATISTIC(NumPeeled, "Number of loops peeled"); -STATISTIC(NumPeeledEnd, "Number of loops peeled from end"); static cl::opt UnrollPeelCount( "unroll-peel-count", cl::Hidden, @@ -326,71 +325,19 @@ static unsigned peelToTurnInvariantLoadsDerefencebale(Loop &L, return 0; } -bool llvm::canPeelLastIteration(const Loop &L, ScalarEvolution &SE) { - const SCEV *BTC = SE.getBackedgeTakenCount(&L); - Value *Inc; - CmpPredicate Pred; - BasicBlock *Succ1; - BasicBlock *Succ2; - // The loop must execute at least 2 iterations to guarantee that peeled - // iteration executes. - // TODO: Add checks during codegen. - if (isa(BTC) || - !SE.isKnownPredicate(CmpInst::ICMP_UGT, BTC, SE.getZero(BTC->getType()))) - return false; - - // Check if the exit condition of the loop can be adjusted by the peeling - // codegen. For now, it must - // * exit via the latch, - // * the exit condition must be a NE/EQ compare of an induction with step - // of 1. - BasicBlock *Latch = L.getLoopLatch(); - return Latch && Latch == L.getExitingBlock() && - match(Latch->getTerminator(), - m_Br(m_ICmp(Pred, m_Value(Inc), m_Value()), m_BasicBlock(Succ1), - m_BasicBlock(Succ2))) && - ((Pred == CmpInst::ICMP_EQ && Succ2 == L.getHeader()) || - (Pred == CmpInst::ICMP_NE && Succ1 == L.getHeader())) && - isa(SE.getSCEV(Inc)) && - cast(SE.getSCEV(Inc))->getStepRecurrence(SE)->isOne(); -} - -/// Returns true if the last iteration can be peeled off and the condition (Pred -/// LeftAR, RightSCEV) is known at the last iteration and the inverse condition -/// is known at the second-to-last. -static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred, - const SCEVAddRecExpr *LeftAR, - const SCEV *RightSCEV, - ScalarEvolution &SE) { - if (!canPeelLastIteration(L, SE)) - return false; - - const SCEV *BTC = SE.getBackedgeTakenCount(&L); - const SCEV *ValAtLastIter = LeftAR->evaluateAtIteration(BTC, SE); - const SCEV *ValAtSecondToLastIter = LeftAR->evaluateAtIteration( - SE.getMinusSCEV(BTC, SE.getOne(BTC->getType())), SE); - - return SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), ValAtLastIter, - RightSCEV) && - SE.isKnownPredicate(Pred, ValAtSecondToLastIter, RightSCEV); -} - -// Return the number of iterations to peel off from the beginning and end of the -// loop respectively, that make conditions in the body true/false. For example, -// if we peel 2 iterations off the loop below, the condition i < 2 can be -// evaluated at compile time. -// +// Return the number of iterations to peel off that make conditions in the +// body true/false. For example, if we peel 2 iterations off the loop below, +// the condition i < 2 can be evaluated at compile time. // for (i = 0; i < n; i++) // if (i < 2) // .. // else // .. // } -static std::pair -countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE) { +static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, + ScalarEvolution &SE) { assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form"); unsigned DesiredPeelCount = 0; - unsigned DesiredPeelCountLast = 0; // Do not peel the entire loop. const SCEV *BE = SE.getConstantMaxBackedgeTakenCount(&L); @@ -474,11 +421,8 @@ countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE) { const SCEV *Step = LeftAR->getStepRecurrence(SE); if (!PeelWhilePredicateIsKnown(NewPeelCount, IterVal, RightSCEV, Step, - Pred)) { - if (shouldPeelLastIteration(L, Pred, LeftAR, RightSCEV, SE)) - DesiredPeelCountLast = 1; + Pred)) return; - } // However, for equality comparisons, that isn't always sufficient to // eliminate the comparsion in loop body, we may need to peel one more @@ -495,7 +439,6 @@ countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE) { } DesiredPeelCount = std::max(DesiredPeelCount, NewPeelCount); - DesiredPeelCountLast = std::max(DesiredPeelCountLast, NewPeelCount); }; auto ComputePeelCountMinMax = [&](MinMaxIntrinsic *MinMax) { @@ -557,7 +500,7 @@ countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE) { ComputePeelCount(BI->getCondition(), 0); } - return {DesiredPeelCount, DesiredPeelCountLast}; + return DesiredPeelCount; } /// This "heuristic" exactly matches implicit behavior which used to exist @@ -650,9 +593,8 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, DesiredPeelCount = std::max(DesiredPeelCount, *NumPeels); } - const auto &[CountToEliminateCmps, CountToEliminateCmpsLast] = - countToEliminateCompares(*L, MaxPeelCount, SE); - DesiredPeelCount = std::max(DesiredPeelCount, CountToEliminateCmps); + DesiredPeelCount = std::max(DesiredPeelCount, + countToEliminateCompares(*L, MaxPeelCount, SE)); if (DesiredPeelCount == 0) DesiredPeelCount = peelToTurnInvariantLoadsDerefencebale(*L, DT, AC); @@ -667,23 +609,6 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, << " some Phis into invariants.\n"); PP.PeelCount = DesiredPeelCount; PP.PeelProfiledIterations = false; - PP.PeelLast = false; - return; - } - } - - if (CountToEliminateCmpsLast > 0) { - unsigned DesiredPeelCountLast = - std::min(CountToEliminateCmpsLast, MaxPeelCount); - // Consider max peel count limitation. - assert(DesiredPeelCountLast > 0 && "Wrong loop size estimation?"); - if (DesiredPeelCountLast + AlreadyPeeled <= UnrollPeelMaxCount) { - LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount - << " iteration(s) to turn" - << " some Phis into invariants.\n"); - PP.PeelCount = DesiredPeelCountLast; - PP.PeelProfiledIterations = false; - PP.PeelLast = true; return; } } @@ -808,7 +733,6 @@ static void initBranchWeights(DenseMap &WeightInfos, /// InsertBot. /// \param IterNumber The serial number of the iteration currently being /// peeled off. -/// \param PeelLast Peel off the last iterations from \p L. /// \param ExitEdges The exit edges of the original loop. /// \param[out] NewBlocks A list of the blocks in the newly created clone /// \param[out] VMap The value map between the loop and the new clone. @@ -816,8 +740,7 @@ static void initBranchWeights(DenseMap &WeightInfos, /// \param LVMap A value-map that maps instructions from the original loop to /// instructions in the last peeled-off iteration. static void cloneLoopBlocks( - Loop *L, unsigned IterNumber, bool PeelLast, BasicBlock *InsertTop, - BasicBlock *InsertBot, + Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot, SmallVectorImpl> &ExitEdges, SmallVectorImpl &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT, @@ -881,26 +804,16 @@ static void cloneLoopBlocks( // Similarly, for the latch: // The original exiting edge is still hooked up to the loop exit. + // The backedge now goes to the "bottom", which is either the loop's real + // header (for the last peeled iteration) or the copied header of the next + // iteration (for every other iteration) BasicBlock *NewLatch = cast(VMap[Latch]); - if (PeelLast) { - // This is the last iteration and we definitely will go to the exit. Just - // set both successors to InsertBot and let the branch be simplified later. - assert(IterNumber == 0 && "Only peeling a single iteration implemented."); - auto *LatchTerm = cast(NewLatch->getTerminator()); - LatchTerm->setSuccessor(0, InsertBot); - LatchTerm->setSuccessor(1, InsertBot); - } else { - auto *LatchTerm = cast(NewLatch->getTerminator()); - // The backedge now goes to the "bottom", which is either the loop's real - // header (for the last peeled iteration) or the copied header of the next - // iteration (for every other iteration) - for (unsigned idx = 0, e = LatchTerm->getNumSuccessors(); idx < e; ++idx) { - if (LatchTerm->getSuccessor(idx) == Header) { - LatchTerm->setSuccessor(idx, InsertBot); - break; - } + auto *LatchTerm = cast(NewLatch->getTerminator()); + for (unsigned idx = 0, e = LatchTerm->getNumSuccessors(); idx < e; ++idx) + if (LatchTerm->getSuccessor(idx) == Header) { + LatchTerm->setSuccessor(idx, InsertBot); + break; } - } if (DT) DT->changeImmediateDominator(InsertBot, NewLatch); @@ -908,33 +821,23 @@ static void cloneLoopBlocks( // that pick an incoming value from either the preheader, or the previous // loop iteration. Since this copy is no longer part of the loop, we // resolve this statically: - if (PeelLast) { - // For the last iteration, we use the value from the latch of the original - // loop directly. - for (BasicBlock::iterator I = Header->begin(); isa(I); ++I) { - PHINode *NewPHI = cast(VMap[&*I]); - VMap[&*I] = NewPHI->getIncomingValueForBlock(Latch); - NewPHI->eraseFromParent(); - } - } else { - // For the first iteration, we use the value from the preheader directly. - // For any other iteration, we replace the phi with the value generated by - // the immediately preceding clone of the loop body (which represents - // the previous iteration). - for (BasicBlock::iterator I = Header->begin(); isa(I); ++I) { - PHINode *NewPHI = cast(VMap[&*I]); - if (IterNumber == 0) { - VMap[&*I] = NewPHI->getIncomingValueForBlock(PreHeader); - } else { - Value *LatchVal = NewPHI->getIncomingValueForBlock(Latch); - Instruction *LatchInst = dyn_cast(LatchVal); - if (LatchInst && L->contains(LatchInst)) - VMap[&*I] = LVMap[LatchInst]; - else - VMap[&*I] = LatchVal; - } - NewPHI->eraseFromParent(); + // For the first iteration, we use the value from the preheader directly. + // For any other iteration, we replace the phi with the value generated by + // the immediately preceding clone of the loop body (which represents + // the previous iteration). + for (BasicBlock::iterator I = Header->begin(); isa(I); ++I) { + PHINode *NewPHI = cast(VMap[&*I]); + if (IterNumber == 0) { + VMap[&*I] = NewPHI->getIncomingValueForBlock(PreHeader); + } else { + Value *LatchVal = NewPHI->getIncomingValueForBlock(Latch); + Instruction *LatchInst = dyn_cast(LatchVal); + if (LatchInst && L->contains(LatchInst)) + VMap[&*I] = LVMap[LatchInst]; + else + VMap[&*I] = LatchVal; } + NewPHI->eraseFromParent(); } // Fix up the outgoing values - we need to add a value for the iteration @@ -1002,14 +905,11 @@ llvm::gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, /// this provides a benefit, since the peeled off iterations, which account /// for the bulk of dynamic execution, can be further simplified by scalar /// optimizations. -bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, +bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree &DT, AssumptionCache *AC, bool PreserveLCSSA, ValueToValueMapTy &LVMap) { assert(PeelCount > 0 && "Attempt to peel out zero iterations?"); assert(canPeel(L) && "Attempt to peel a loop which is not peelable?"); - assert((!PeelLast || (canPeelLastIteration(*L, *SE) && PeelCount == 1)) && - "when peeling the last iteration, the loop must be supported and can " - "only peel a single iteration"); LoopBlocksDFS LoopBlocks(L); LoopBlocks.perform(LI); @@ -1044,99 +944,60 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, Function *F = Header->getParent(); - // Set up all the necessary basic blocks. - BasicBlock *InsertTop; - BasicBlock *InsertBot; - BasicBlock *NewPreHeader; - DenseMap ExitValues; - if (PeelLast) { - // It is convenient to split the single exit block from the latch the - // into 3 parts - two blocks to anchor the peeled copy of the loop body, - // and a new final exit block. - - // Peeling the last iteration transforms. - // - // PreHeader: - // ... - // Header: - // LoopBody - // If (cond) goto Header - // Exit: - // - // into - // - // Header: - // LoopBody - // If (cond) goto Header - // InsertTop: - // LoopBody - // If (!cond) goto InsertBot - // InsertBot: - // Exit: - // ... - BasicBlock *Exit = L->getExitBlock(); - for (PHINode &P : Exit->phis()) - ExitValues[&P] = P.getIncomingValueForBlock(Latch); - - InsertTop = SplitEdge(Latch, Exit, &DT, LI); - InsertBot = SplitBlock(InsertTop, InsertTop->getTerminator(), &DT, LI); - - InsertTop->setName(Exit->getName() + ".peel.begin"); - InsertBot->setName(Exit->getName() + ".peel.next"); - } else { - // It is convenient to split the preheader into 3 parts - two blocks to - // anchor the peeled copy of the loop body, and a new preheader for the - // "real" loop. - - // Peeling the first iteration transforms. - // - // PreHeader: - // ... - // Header: - // LoopBody - // If (cond) goto Header - // Exit: - // - // into - // - // InsertTop: - // LoopBody - // If (!cond) goto Exit - // InsertBot: - // NewPreHeader: - // ... - // Header: - // LoopBody - // If (cond) goto Header - // Exit: - // - // Each following iteration will split the current bottom anchor in two, - // and put the new copy of the loop body between these two blocks. That - // is, after peeling another iteration from the example above, we'll - // split InsertBot, and get: - // - // InsertTop: - // LoopBody - // If (!cond) goto Exit - // InsertBot: - // LoopBody - // If (!cond) goto Exit - // InsertBot.next: - // NewPreHeader: - // ... - // Header: - // LoopBody - // If (cond) goto Header - // Exit: - // - InsertTop = SplitEdge(PreHeader, Header, &DT, LI); - InsertBot = SplitBlock(InsertTop, InsertTop->getTerminator(), &DT, LI); - NewPreHeader = SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI); - - InsertTop->setName(Header->getName() + ".peel.begin"); - InsertBot->setName(Header->getName() + ".peel.next"); - NewPreHeader->setName(PreHeader->getName() + ".peel.newph"); - } + // Set up all the necessary basic blocks. It is convenient to split the + // preheader into 3 parts - two blocks to anchor the peeled copy of the loop + // body, and a new preheader for the "real" loop. + + // Peeling the first iteration transforms. + // + // PreHeader: + // ... + // Header: + // LoopBody + // If (cond) goto Header + // Exit: + // + // into + // + // InsertTop: + // LoopBody + // If (!cond) goto Exit + // InsertBot: + // NewPreHeader: + // ... + // Header: + // LoopBody + // If (cond) goto Header + // Exit: + // + // Each following iteration will split the current bottom anchor in two, + // and put the new copy of the loop body between these two blocks. That is, + // after peeling another iteration from the example above, we'll split + // InsertBot, and get: + // + // InsertTop: + // LoopBody + // If (!cond) goto Exit + // InsertBot: + // LoopBody + // If (!cond) goto Exit + // InsertBot.next: + // NewPreHeader: + // ... + // Header: + // LoopBody + // If (cond) goto Header + // Exit: + + BasicBlock *InsertTop = SplitEdge(PreHeader, Header, &DT, LI); + BasicBlock *InsertBot = + SplitBlock(InsertTop, InsertTop->getTerminator(), &DT, LI); + BasicBlock *NewPreHeader = + SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI); + + InsertTop->setName(Header->getName() + ".peel.begin"); + InsertBot->setName(Header->getName() + ".peel.next"); + NewPreHeader->setName(PreHeader->getName() + ".peel.newph"); Instruction *LatchTerm = cast(cast(Latch)->getTerminator()); @@ -1152,40 +1013,23 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes); // For each peeled-off iteration, make a copy of the loop. - ValueToValueMapTy VMap; for (unsigned Iter = 0; Iter < PeelCount; ++Iter) { SmallVector NewBlocks; + ValueToValueMapTy VMap; - cloneLoopBlocks(L, Iter, PeelLast, InsertTop, InsertBot, ExitEdges, - NewBlocks, LoopBlocks, VMap, LVMap, &DT, LI, + cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks, + LoopBlocks, VMap, LVMap, &DT, LI, LoopLocalNoAliasDeclScopes, *SE); // Remap to use values from the current iteration instead of the // previous one. remapInstructionsInBlocks(NewBlocks, VMap); - if (Iter == 0) { - if (PeelLast) { - // Adjust the exit condition so the loop exits one iteration early. - // For now we simply subtract one form the second operand of the - // exit condition. This relies on the peel count computation to - // check that this is actually legal. In particular, it ensures that - // the first operand of the compare is an AddRec with step 1 and we - // execute more than one iteration. - auto *Cmp = - cast(L->getLoopLatch()->getTerminator()->getOperand(0)); - IRBuilder B(Cmp); - Cmp->setOperand( - 1, B.CreateSub(Cmp->getOperand(1), - ConstantInt::get(Cmp->getOperand(1)->getType(), 1))); - } else { - // Update IDoms of the blocks reachable through exits. - for (auto BBIDom : NonLoopBlocksIDom) - DT.changeImmediateDominator(BBIDom.first, - cast(LVMap[BBIDom.second])); - } - } - + // Update IDoms of the blocks reachable through exits. + if (Iter == 0) + for (auto BBIDom : NonLoopBlocksIDom) + DT.changeImmediateDominator(BBIDom.first, + cast(LVMap[BBIDom.second])); #ifdef EXPENSIVE_CHECKS assert(DT.verify(DominatorTree::VerificationLevel::Fast)); #endif @@ -1208,24 +1052,16 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, F->end()); } - if (PeelLast) { - // Now adjust users of the original exit values by replacing them with the - // exit value from the peeled iteration. - for (const auto &[P, E] : ExitValues) - P->replaceAllUsesWith(VMap.lookup(E)); - formLCSSA(*L, DT, LI, SE); - } else { - // Now adjust the phi nodes in the loop header to get their initial values - // from the last peeled-off iteration instead of the preheader. - for (BasicBlock::iterator I = Header->begin(); isa(I); ++I) { - PHINode *PHI = cast(I); - Value *NewVal = PHI->getIncomingValueForBlock(Latch); - Instruction *LatchInst = dyn_cast(NewVal); - if (LatchInst && L->contains(LatchInst)) - NewVal = LVMap[LatchInst]; + // Now adjust the phi nodes in the loop header to get their initial values + // from the last peeled-off iteration instead of the preheader. + for (BasicBlock::iterator I = Header->begin(); isa(I); ++I) { + PHINode *PHI = cast(I); + Value *NewVal = PHI->getIncomingValueForBlock(Latch); + Instruction *LatchInst = dyn_cast(NewVal); + if (LatchInst && L->contains(LatchInst)) + NewVal = LVMap[LatchInst]; - PHI->setIncomingValueForBlock(NewPreHeader, NewVal); - } + PHI->setIncomingValueForBlock(NewPreHeader, NewVal); } for (const auto &[Term, Info] : Weights) { @@ -1254,7 +1090,6 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI, simplifyLoop(L, &DT, LI, SE, AC, nullptr, PreserveLCSSA); NumPeeled++; - NumPeeledEnd += PeelLast; return true; } diff --git a/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp b/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp index 511c15555fa83..de84a76ede7ff 100644 --- a/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp +++ b/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp @@ -290,9 +290,8 @@ Value *getAndroidSlotPtr(IRBuilder<> &IRB, int Slot) { Module *M = IRB.GetInsertBlock()->getParent()->getParent(); // Android provides a fixed TLS slot for sanitizers. See TLS_SLOT_SANITIZER // in Bionic's libc/private/bionic_tls.h. - Function *ThreadPointerFunc = Intrinsic::getOrInsertDeclaration( - M, Intrinsic::thread_pointer, - IRB.getPtrTy(M->getDataLayout().getDefaultGlobalsAddressSpace())); + Function *ThreadPointerFunc = + Intrinsic::getOrInsertDeclaration(M, Intrinsic::thread_pointer); return IRB.CreateConstGEP1_32(IRB.getInt8Ty(), IRB.CreateCall(ThreadPointerFunc), 8 * Slot); } diff --git a/llvm/lib/Transforms/Vectorize/CMakeLists.txt b/llvm/lib/Transforms/Vectorize/CMakeLists.txt index 2b5488b2e8126..0dc6a7d2f594f 100644 --- a/llvm/lib/Transforms/Vectorize/CMakeLists.txt +++ b/llvm/lib/Transforms/Vectorize/CMakeLists.txt @@ -1,5 +1,4 @@ add_llvm_component_library(LLVMVectorize - EVLIndVarSimplify.cpp LoadStoreVectorizer.cpp LoopIdiomVectorize.cpp LoopVectorizationLegality.cpp diff --git a/llvm/lib/Transforms/Vectorize/EVLIndVarSimplify.cpp b/llvm/lib/Transforms/Vectorize/EVLIndVarSimplify.cpp deleted file mode 100644 index 4a1fb095bae35..0000000000000 --- a/llvm/lib/Transforms/Vectorize/EVLIndVarSimplify.cpp +++ /dev/null @@ -1,301 +0,0 @@ -//===---- EVLIndVarSimplify.cpp - Optimize vectorized loops w/ EVL IV------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This pass optimizes a vectorized loop with canonical IV to using EVL-based -// IV if it was tail-folded by predicated EVL. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Vectorize/EVLIndVarSimplify.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/IVDescriptors.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/OptimizationRemarkEmitter.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/PatternMatch.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" - -#define DEBUG_TYPE "evl-iv-simplify" - -using namespace llvm; - -STATISTIC(NumEliminatedCanonicalIV, "Number of canonical IVs we eliminated"); - -static cl::opt EnableEVLIndVarSimplify( - "enable-evl-indvar-simplify", - cl::desc("Enable EVL-based induction variable simplify Pass"), cl::Hidden, - cl::init(true)); - -namespace { -struct EVLIndVarSimplifyImpl { - ScalarEvolution &SE; - OptimizationRemarkEmitter *ORE = nullptr; - - EVLIndVarSimplifyImpl(LoopStandardAnalysisResults &LAR, - OptimizationRemarkEmitter *ORE) - : SE(LAR.SE), ORE(ORE) {} - - /// Returns true if modify the loop. - bool run(Loop &L); -}; -} // anonymous namespace - -/// Returns the constant part of vectorization factor from the induction -/// variable's step value SCEV expression. -static uint32_t getVFFromIndVar(const SCEV *Step, const Function &F) { - if (!Step) - return 0U; - - // Looking for loops with IV step value in the form of `( x - // vscale)`. - if (const auto *Mul = dyn_cast(Step)) { - if (Mul->getNumOperands() == 2) { - const SCEV *LHS = Mul->getOperand(0); - const SCEV *RHS = Mul->getOperand(1); - if (const auto *Const = dyn_cast(LHS); - Const && isa(RHS)) { - uint64_t V = Const->getAPInt().getLimitedValue(); - if (llvm::isUInt<32>(V)) - return V; - } - } - } - - // If not, see if the vscale_range of the parent function is a fixed value, - // which makes the step value to be replaced by a constant. - if (F.hasFnAttribute(Attribute::VScaleRange)) - if (const auto *ConstStep = dyn_cast(Step)) { - APInt V = ConstStep->getAPInt().abs(); - ConstantRange CR = llvm::getVScaleRange(&F, 64); - if (const APInt *Fixed = CR.getSingleElement()) { - V = V.zextOrTrunc(Fixed->getBitWidth()); - uint64_t VF = V.udiv(*Fixed).getLimitedValue(); - if (VF && llvm::isUInt<32>(VF) && - // Make sure step is divisible by vscale. - V.urem(*Fixed).isZero()) - return VF; - } - } - - return 0U; -} - -bool EVLIndVarSimplifyImpl::run(Loop &L) { - if (!EnableEVLIndVarSimplify) - return false; - - if (!getBooleanLoopAttribute(&L, "llvm.loop.isvectorized")) - return false; - const MDOperand *EVLMD = - findStringMetadataForLoop(&L, "llvm.loop.isvectorized.tailfoldingstyle") - .value_or(nullptr); - if (!EVLMD || !EVLMD->equalsStr("evl")) - return false; - - BasicBlock *LatchBlock = L.getLoopLatch(); - ICmpInst *OrigLatchCmp = L.getLatchCmpInst(); - if (!LatchBlock || !OrigLatchCmp) - return false; - - InductionDescriptor IVD; - PHINode *IndVar = L.getInductionVariable(SE); - if (!IndVar || !L.getInductionDescriptor(SE, IVD)) { - const char *Reason = (IndVar ? "induction descriptor is not available" - : "cannot recognize induction variable"); - LLVM_DEBUG(dbgs() << "Cannot retrieve IV from loop " << L.getName() - << " because" << Reason << "\n"); - if (ORE) { - ORE->emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "UnrecognizedIndVar", - L.getStartLoc(), L.getHeader()) - << "Cannot retrieve IV because " << ore::NV("Reason", Reason); - }); - } - return false; - } - - BasicBlock *InitBlock, *BackEdgeBlock; - if (!L.getIncomingAndBackEdge(InitBlock, BackEdgeBlock)) { - LLVM_DEBUG(dbgs() << "Expect unique incoming and backedge in " - << L.getName() << "\n"); - if (ORE) { - ORE->emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "UnrecognizedLoopStructure", - L.getStartLoc(), L.getHeader()) - << "Does not have a unique incoming and backedge"; - }); - } - return false; - } - - // Retrieve the loop bounds. - std::optional Bounds = L.getBounds(SE); - if (!Bounds) { - LLVM_DEBUG(dbgs() << "Could not obtain the bounds for loop " << L.getName() - << "\n"); - if (ORE) { - ORE->emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "UnrecognizedLoopStructure", - L.getStartLoc(), L.getHeader()) - << "Could not obtain the loop bounds"; - }); - } - return false; - } - Value *CanonicalIVInit = &Bounds->getInitialIVValue(); - Value *CanonicalIVFinal = &Bounds->getFinalIVValue(); - - const SCEV *StepV = IVD.getStep(); - uint32_t VF = getVFFromIndVar(StepV, *L.getHeader()->getParent()); - if (!VF) { - LLVM_DEBUG(dbgs() << "Could not infer VF from IndVar step '" << *StepV - << "'\n"); - if (ORE) { - ORE->emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "UnrecognizedIndVar", - L.getStartLoc(), L.getHeader()) - << "Could not infer VF from IndVar step " - << ore::NV("Step", StepV); - }); - } - return false; - } - LLVM_DEBUG(dbgs() << "Using VF=" << VF << " for loop " << L.getName() - << "\n"); - - // Try to find the EVL-based induction variable. - using namespace PatternMatch; - BasicBlock *BB = IndVar->getParent(); - - Value *EVLIndVar = nullptr; - Value *RemTC = nullptr; - Value *TC = nullptr; - auto IntrinsicMatch = m_Intrinsic( - m_Value(RemTC), m_SpecificInt(VF), - /*Scalable=*/m_SpecificInt(1)); - for (PHINode &PN : BB->phis()) { - if (&PN == IndVar) - continue; - - // Check 1: it has to contain both incoming (init) & backedge blocks - // from IndVar. - if (PN.getBasicBlockIndex(InitBlock) < 0 || - PN.getBasicBlockIndex(BackEdgeBlock) < 0) - continue; - // Check 2: EVL index is always increasing, thus its inital value has to be - // equal to either the initial IV value (when the canonical IV is also - // increasing) or the last IV value (when canonical IV is decreasing). - Value *Init = PN.getIncomingValueForBlock(InitBlock); - using Direction = Loop::LoopBounds::Direction; - switch (Bounds->getDirection()) { - case Direction::Increasing: - if (Init != CanonicalIVInit) - continue; - break; - case Direction::Decreasing: - if (Init != CanonicalIVFinal) - continue; - break; - case Direction::Unknown: - // To be more permissive and see if either the initial or final IV value - // matches PN's init value. - if (Init != CanonicalIVInit && Init != CanonicalIVFinal) - continue; - break; - } - Value *RecValue = PN.getIncomingValueForBlock(BackEdgeBlock); - assert(RecValue && "expect recurrent IndVar value"); - - LLVM_DEBUG(dbgs() << "Found candidate PN of EVL-based IndVar: " << PN - << "\n"); - - // Check 3: Pattern match to find the EVL-based index and total trip count - // (TC). - if (match(RecValue, - m_c_Add(m_ZExtOrSelf(IntrinsicMatch), m_Specific(&PN))) && - match(RemTC, m_Sub(m_Value(TC), m_Specific(&PN)))) { - EVLIndVar = RecValue; - break; - } - } - - if (!EVLIndVar || !TC) - return false; - - LLVM_DEBUG(dbgs() << "Using " << *EVLIndVar << " for EVL-based IndVar\n"); - if (ORE) { - ORE->emit([&]() { - DebugLoc DL; - BasicBlock *Region = nullptr; - if (auto *I = dyn_cast(EVLIndVar)) { - DL = I->getDebugLoc(); - Region = I->getParent(); - } else { - DL = L.getStartLoc(); - Region = L.getHeader(); - } - return OptimizationRemark(DEBUG_TYPE, "UseEVLIndVar", DL, Region) - << "Using " << ore::NV("EVLIndVar", EVLIndVar) - << " for EVL-based IndVar"; - }); - } - - // Create an EVL-based comparison and replace the branch to use it as - // predicate. - - // Loop::getLatchCmpInst check at the beginning of this function has ensured - // that latch block ends in a conditional branch. - auto *LatchBranch = cast(LatchBlock->getTerminator()); - assert(LatchBranch->isConditional() && - "expect the loop latch to be ended with a conditional branch"); - ICmpInst::Predicate Pred; - if (LatchBranch->getSuccessor(0) == L.getHeader()) - Pred = ICmpInst::ICMP_NE; - else - Pred = ICmpInst::ICMP_EQ; - - IRBuilder<> Builder(OrigLatchCmp); - auto *NewLatchCmp = Builder.CreateICmp(Pred, EVLIndVar, TC); - OrigLatchCmp->replaceAllUsesWith(NewLatchCmp); - - // llvm::RecursivelyDeleteDeadPHINode only deletes cycles whose values are - // not used outside the cycles. However, in this case the now-RAUW-ed - // OrigLatchCmp will be considered a use outside the cycle while in reality - // it's practically dead. Thus we need to remove it before calling - // RecursivelyDeleteDeadPHINode. - (void)RecursivelyDeleteTriviallyDeadInstructions(OrigLatchCmp); - if (llvm::RecursivelyDeleteDeadPHINode(IndVar)) - LLVM_DEBUG(dbgs() << "Removed original IndVar\n"); - - ++NumEliminatedCanonicalIV; - - return true; -} - -PreservedAnalyses EVLIndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &LAM, - LoopStandardAnalysisResults &AR, - LPMUpdater &U) { - Function &F = *L.getHeader()->getParent(); - auto &FAMProxy = LAM.getResult(L, AR); - OptimizationRemarkEmitter *ORE = - FAMProxy.getCachedResult(F); - - if (EVLIndVarSimplifyImpl(AR, ORE).run(L)) - return PreservedAnalyses::allInSet(); - return PreservedAnalyses::all(); -} diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 7ad02956a5b69..1611c6d3a4437 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -603,13 +603,13 @@ class InnerLoopVectorizer { // --- Vectorization state --- /// The vector-loop preheader. - BasicBlock *LoopVectorPreHeader = nullptr; + BasicBlock *LoopVectorPreHeader; /// The scalar-loop preheader. - BasicBlock *LoopScalarPreHeader = nullptr; + BasicBlock *LoopScalarPreHeader; /// Middle Block between the vector and the scalar. - BasicBlock *LoopMiddleBlock = nullptr; + BasicBlock *LoopMiddleBlock; /// A list of all bypass blocks. The first block is the entry of the loop. SmallVector LoopBypassBlocks; diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index eb339282fdae8..45cf4e1eac092 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1584,7 +1584,7 @@ static void addMask(SmallVectorImpl &Mask, ArrayRef SubMask, /// before: 6 9 5 4 9 2 1 0 /// after: 6 3 5 4 7 2 1 0 static void fixupOrderingIndices(MutableArrayRef Order) { - const size_t Sz = Order.size(); + const unsigned Sz = Order.size(); SmallBitVector UnusedIndices(Sz, /*t=*/true); SmallBitVector MaskedIndices(Sz); for (unsigned I = 0; I < Sz; ++I) { @@ -2216,7 +2216,7 @@ class BoUpSLP { !LI2->isSimple()) return CheckSameEntryOrFail(); - std::optional Dist = getPointersDiff( + std::optional Dist = getPointersDiff( LI1->getType(), LI1->getPointerOperand(), LI2->getType(), LI2->getPointerOperand(), DL, SE, /*StrictCheck=*/true); if (!Dist || *Dist == 0) { @@ -3619,10 +3619,9 @@ class BoUpSLP { /// vector loads/masked gathers instead of regular gathers. Later these loads /// are reshufled to build final gathered nodes. void tryToVectorizeGatheredLoads( - const SmallMapVector< - std::tuple, - SmallVector>>, 8> - &GatheredLoads); + const SmallMapVector, + SmallVector>>, + 8> &GatheredLoads); /// Helper for `findExternalStoreUsersReorderIndices()`. It iterates over the /// users of \p TE and collects the stores. It returns the map from the store @@ -4316,7 +4315,7 @@ class BoUpSLP { /// bundle being the last instruction in the program order during /// vectorization process since the basic blocks are affected, need to /// pre-gather them before. - SmallDenseMap EntryToLastInstruction; + DenseMap EntryToLastInstruction; /// List of gather nodes, depending on other gather/vector nodes, which should /// be emitted after the vector instruction emission process to correctly @@ -5369,7 +5368,7 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE, // patterns. SmallVector GatheredScalars(TE.Scalars.begin(), TE.Scalars.end()); Type *ScalarTy = GatheredScalars.front()->getType(); - size_t NumScalars = GatheredScalars.size(); + int NumScalars = GatheredScalars.size(); if (!isValidElementType(ScalarTy)) return std::nullopt; auto *VecTy = getWidenedType(ScalarTy, NumScalars); @@ -5443,7 +5442,7 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE, unsigned Limit = getNumElems(CurrentOrder.size(), PartSz, I); MutableArrayRef Slice = CurrentOrder.slice(I * PartSz, Limit); // Shuffle of at least 2 vectors - ignore. - if (any_of(Slice, [&](unsigned I) { return I != NumScalars; })) { + if (any_of(Slice, [&](int I) { return I != NumScalars; })) { std::fill(Slice.begin(), Slice.end(), NumScalars); ShuffledSubMasks.set(I); continue; @@ -5541,8 +5540,8 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE, return std::max(Entries[I].front()->getVectorFactor(), Entries[I].back()->getVectorFactor()); }); - unsigned NumUndefs = - count_if(CurrentOrder, [&](unsigned Idx) { return Idx == NumScalars; }); + int NumUndefs = + count_if(CurrentOrder, [&](int Idx) { return Idx == NumScalars; }); if (ShuffledSubMasks.all() || (NumScalars > 2 && NumUndefs >= NumScalars / 2)) return std::nullopt; return std::move(CurrentOrder); @@ -5869,11 +5868,7 @@ static bool buildCompressMask(ArrayRef PointerOps, Value *Ptr0 = Order.empty() ? PointerOps.front() : PointerOps[Order.front()]; for (unsigned I : seq(1, Sz)) { Value *Ptr = Order.empty() ? PointerOps[I] : PointerOps[Order[I]]; - std::optional OptPos = - getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, DL, SE); - if (!OptPos || OptPos > std::numeric_limits::max()) - return false; - unsigned Pos = static_cast(*OptPos); + unsigned Pos = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, DL, SE); CompressMask[I] = Pos; if (!Stride) continue; @@ -5899,7 +5894,7 @@ static bool isMaskedLoadCompress( VectorType *&LoadVecTy) { InterleaveFactor = 0; Type *ScalarTy = VL.front()->getType(); - const size_t Sz = VL.size(); + const unsigned Sz = VL.size(); auto *VecTy = getWidenedType(ScalarTy, Sz); constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; SmallVector Mask; @@ -5926,11 +5921,11 @@ static bool isMaskedLoadCompress( Ptr0 = PointerOps[Order.front()]; PtrN = PointerOps[Order.back()]; } - std::optional Diff = + std::optional Diff = getPointersDiff(ScalarTy, Ptr0, ScalarTy, PtrN, DL, SE); if (!Diff) return false; - const size_t MaxRegSize = + const unsigned MaxRegSize = TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector) .getFixedValue(); // Check for very large distances between elements. @@ -6056,10 +6051,9 @@ static bool isStridedLoad(ArrayRef VL, ArrayRef PointerOps, ArrayRef Order, const TargetTransformInfo &TTI, const DataLayout &DL, ScalarEvolution &SE, - const bool IsAnyPointerUsedOutGraph, - const int64_t Diff) { - const size_t Sz = VL.size(); - const uint64_t AbsoluteDiff = std::abs(Diff); + const bool IsAnyPointerUsedOutGraph, const int Diff) { + const unsigned Sz = VL.size(); + const unsigned AbsoluteDiff = std::abs(Diff); Type *ScalarTy = VL.front()->getType(); auto *VecTy = getWidenedType(ScalarTy, Sz); if (IsAnyPointerUsedOutGraph || @@ -6067,9 +6061,9 @@ static bool isStridedLoad(ArrayRef VL, ArrayRef PointerOps, (Sz > MinProfitableStridedLoads || (AbsoluteDiff <= MaxProfitableLoadStride * Sz && AbsoluteDiff % Sz == 0 && has_single_bit(AbsoluteDiff / Sz)))) || - Diff == -(static_cast(Sz) - 1)) { - int64_t Stride = Diff / static_cast(Sz - 1); - if (Diff != Stride * static_cast(Sz - 1)) + Diff == -(static_cast(Sz) - 1)) { + int Stride = Diff / static_cast(Sz - 1); + if (Diff != Stride * static_cast(Sz - 1)) return false; Align Alignment = cast(Order.empty() ? VL.front() : VL[Order.front()]) @@ -6087,9 +6081,9 @@ static bool isStridedLoad(ArrayRef VL, ArrayRef PointerOps, } // Iterate through all pointers and check if all distances are // unique multiple of Dist. - SmallSet Dists; + SmallSet Dists; for (Value *Ptr : PointerOps) { - int64_t Dist = 0; + int Dist = 0; if (Ptr == PtrN) Dist = Diff; else if (Ptr != Ptr0) @@ -6128,7 +6122,7 @@ BoUpSLP::canVectorizeLoads(ArrayRef VL, const Value *VL0, // Make sure all loads in the bundle are simple - we can't vectorize // atomic or volatile loads. PointerOps.clear(); - const size_t Sz = VL.size(); + const unsigned Sz = VL.size(); PointerOps.resize(Sz); auto *POIter = PointerOps.begin(); for (Value *V : VL) { @@ -6171,10 +6165,10 @@ BoUpSLP::canVectorizeLoads(ArrayRef VL, const Value *VL0, Ptr0 = PointerOps[Order.front()]; PtrN = PointerOps[Order.back()]; } - std::optional Diff = + std::optional Diff = getPointersDiff(ScalarTy, Ptr0, ScalarTy, PtrN, *DL, *SE); // Check that the sorted loads are consecutive. - if (static_cast(*Diff) == Sz - 1) + if (static_cast(*Diff) == Sz - 1) return LoadsState::Vectorize; if (isMaskedLoadCompress(VL, PointerOps, Order, *TTI, *DL, *SE, *AC, *DT, *TLI, [&](Value *V) { @@ -6433,9 +6427,8 @@ static bool clusterSortPtrAccesses(ArrayRef VL, // Map from bases to a vector of (Ptr, Offset, OrigIdx), which we insert each // Ptr into, sort and return the sorted indices with values next to one // another. - SmallMapVector< - std::pair, - SmallVector>>, 8> + SmallMapVector, + SmallVector>>, 8> Bases; Bases .try_emplace(std::make_pair( @@ -6448,10 +6441,10 @@ static bool clusterSortPtrAccesses(ArrayRef VL, getUnderlyingObject(Ptr, RecursionMaxDepth)); bool Found = any_of(Bases.try_emplace(Key).first->second, [&, &Cnt = Cnt, &Ptr = Ptr](auto &Base) { - std::optional Diff = - getPointersDiff(ElemTy, std::get<0>(Base.front()), - ElemTy, Ptr, DL, SE, - /*StrictCheck=*/true); + std::optional Diff = getPointersDiff( + ElemTy, std::get<0>(Base.front()), ElemTy, + Ptr, DL, SE, + /*StrictCheck=*/true); if (!Diff) return false; @@ -6501,11 +6494,10 @@ static bool clusterSortPtrAccesses(ArrayRef VL, for (auto &Vec : Base.second) { if (Vec.size() > 1) { stable_sort(Vec, llvm::less_second()); - int64_t InitialOffset = std::get<1>(Vec[0]); + int InitialOffset = std::get<1>(Vec[0]); bool AnyConsecutive = all_of(enumerate(Vec), [InitialOffset](const auto &P) { - return std::get<1>(P.value()) == - int64_t(P.index()) + InitialOffset; + return std::get<1>(P.value()) == int(P.index()) + InitialOffset; }); // Fill SortedIndices array only if it looks worth-while to sort the // ptrs. @@ -7015,7 +7007,7 @@ static void combineOrders(MutableArrayRef Order, ArrayRef SecondaryOrder) { assert((SecondaryOrder.empty() || Order.size() == SecondaryOrder.size()) && "Expected same size of orders"); - size_t Sz = Order.size(); + unsigned Sz = Order.size(); SmallBitVector UsedIndices(Sz); for (unsigned Idx : seq(0, Sz)) { if (Order[Idx] != Sz) @@ -8007,7 +7999,7 @@ BoUpSLP::collectUserStores(const BoUpSLP::TreeEntry *TE) const { if (StoresVec.size() > Lane) continue; if (!StoresVec.empty()) { - std::optional Diff = getPointersDiff( + std::optional Diff = getPointersDiff( SI->getValueOperand()->getType(), SI->getPointerOperand(), SI->getValueOperand()->getType(), StoresVec.front()->getPointerOperand(), *DL, *SE, @@ -8035,14 +8027,14 @@ bool BoUpSLP::canFormVector(ArrayRef StoresVec, // To avoid calling getPointersDiff() while sorting we create a vector of // pairs {store, offset from first} and sort this instead. - SmallVector> StoreOffsetVec; + SmallVector> StoreOffsetVec; StoreInst *S0 = StoresVec[0]; StoreOffsetVec.emplace_back(0, 0); Type *S0Ty = S0->getValueOperand()->getType(); Value *S0Ptr = S0->getPointerOperand(); for (unsigned Idx : seq(1, StoresVec.size())) { StoreInst *SI = StoresVec[Idx]; - std::optional Diff = + std::optional Diff = getPointersDiff(S0Ty, S0Ptr, SI->getValueOperand()->getType(), SI->getPointerOperand(), *DL, *SE, /*StrictCheck=*/true); @@ -8054,7 +8046,7 @@ bool BoUpSLP::canFormVector(ArrayRef StoresVec, return false; sort(StoreOffsetVec, llvm::less_first()); unsigned Idx = 0; - int64_t PrevDist = 0; + int PrevDist = 0; for (const auto &P : StoreOffsetVec) { if (Idx > 0 && P.first != PrevDist + 1) return false; @@ -8138,15 +8130,15 @@ void BoUpSLP::buildTree(ArrayRef Roots) { static void gatherPossiblyVectorizableLoads( const BoUpSLP &R, ArrayRef VL, const DataLayout &DL, ScalarEvolution &SE, const TargetTransformInfo &TTI, - SmallVectorImpl>> &GatheredLoads, + SmallVectorImpl>> &GatheredLoads, bool AddNew = true) { if (VL.empty()) return; Type *ScalarTy = getValueType(VL.front()); if (!isValidElementType(ScalarTy)) return; - SmallVector>> ClusteredLoads; - SmallVector> ClusteredDistToLoad; + SmallVector>> ClusteredLoads; + SmallVector> ClusteredDistToLoad; for (Value *V : VL) { auto *LI = dyn_cast(V); if (!LI) @@ -8162,7 +8154,7 @@ static void gatherPossiblyVectorizableLoads( RecursionMaxDepth) && "Expected loads with the same type, same parent and same " "underlying pointer."); - std::optional Dist = getPointersDiff( + std::optional Dist = getPointersDiff( LI->getType(), LI->getPointerOperand(), Data.front().first->getType(), Data.front().first->getPointerOperand(), DL, SE, /*StrictCheck=*/true); @@ -8184,11 +8176,11 @@ static void gatherPossiblyVectorizableLoads( } } auto FindMatchingLoads = - [&](ArrayRef> Loads, - SmallVectorImpl>> + [&](ArrayRef> Loads, + SmallVectorImpl>> &GatheredLoads, SetVector &ToAdd, SetVector &Repeated, - int64_t &Offset, unsigned &Start) { + int &Offset, unsigned &Start) { if (Loads.empty()) return GatheredLoads.end(); LoadInst *LI = Loads.front().first; @@ -8199,16 +8191,16 @@ static void gatherPossiblyVectorizableLoads( if (LI->getParent() != Data.front().first->getParent() || LI->getType() != Data.front().first->getType()) continue; - std::optional Dist = + std::optional Dist = getPointersDiff(LI->getType(), LI->getPointerOperand(), Data.front().first->getType(), Data.front().first->getPointerOperand(), DL, SE, /*StrictCheck=*/true); if (!Dist) continue; - SmallSet DataDists; + SmallSet DataDists; SmallPtrSet DataLoads; - for (std::pair P : Data) { + for (std::pair P : Data) { DataDists.insert(P.second); DataLoads.insert(P.first); } @@ -8239,10 +8231,10 @@ static void gatherPossiblyVectorizableLoads( ToAdd.clear(); return GatheredLoads.end(); }; - for (ArrayRef> Data : ClusteredLoads) { + for (ArrayRef> Data : ClusteredLoads) { unsigned Start = 0; SetVector ToAdd, LocalToAdd, Repeated; - int64_t Offset = 0; + int Offset = 0; auto *It = FindMatchingLoads(Data, GatheredLoads, LocalToAdd, Repeated, Offset, Start); while (It != GatheredLoads.end()) { @@ -8257,7 +8249,7 @@ static void gatherPossiblyVectorizableLoads( return !ToAdd.contains(Idx) && !Repeated.contains(Idx); })) { auto AddNewLoads = - [&](SmallVectorImpl> &Loads) { + [&](SmallVectorImpl> &Loads) { for (unsigned Idx : seq(Data.size())) { if (ToAdd.contains(Idx) || Repeated.contains(Idx)) continue; @@ -8267,7 +8259,7 @@ static void gatherPossiblyVectorizableLoads( if (!AddNew) { LoadInst *LI = Data.front().first; It = find_if( - GatheredLoads, [&](ArrayRef> PD) { + GatheredLoads, [&](ArrayRef> PD) { return PD.front().first->getParent() == LI->getParent() && PD.front().first->getType() == LI->getType(); }); @@ -8275,7 +8267,7 @@ static void gatherPossiblyVectorizableLoads( AddNewLoads(*It); It = std::find_if( std::next(It), GatheredLoads.end(), - [&](ArrayRef> PD) { + [&](ArrayRef> PD) { return PD.front().first->getParent() == LI->getParent() && PD.front().first->getType() == LI->getType(); }); @@ -8288,10 +8280,9 @@ static void gatherPossiblyVectorizableLoads( } void BoUpSLP::tryToVectorizeGatheredLoads( - const SmallMapVector< - std::tuple, - SmallVector>>, 8> - &GatheredLoads) { + const SmallMapVector, + SmallVector>>, + 8> &GatheredLoads) { GatheredLoadsEntriesFirst = VectorizableTree.size(); SmallVector> LoadSetsToVectorize( @@ -8300,8 +8291,8 @@ void BoUpSLP::tryToVectorizeGatheredLoads( Set.insert_range(VectorizableTree[Idx]->Scalars); // Sort loads by distance. - auto LoadSorter = [](const std::pair &L1, - const std::pair &L2) { + auto LoadSorter = [](const std::pair &L1, + const std::pair &L2) { return L1.second > L2.second; }; @@ -8463,30 +8454,28 @@ void BoUpSLP::tryToVectorizeGatheredLoads( }; auto ProcessGatheredLoads = [&, &TTI = *TTI]( - ArrayRef>> GatheredLoads, + ArrayRef>> GatheredLoads, bool Final = false) { SmallVector NonVectorized; - for (ArrayRef> LoadsDists : - GatheredLoads) { + for (ArrayRef> LoadsDists : GatheredLoads) { if (LoadsDists.size() <= 1) { NonVectorized.push_back(LoadsDists.back().first); continue; } - SmallVector> LocalLoadsDists( - LoadsDists); + SmallVector> LocalLoadsDists(LoadsDists); SmallVector OriginalLoads(make_first_range(LoadsDists)); stable_sort(LocalLoadsDists, LoadSorter); SmallVector Loads; unsigned MaxConsecutiveDistance = 0; unsigned CurrentConsecutiveDist = 1; - int64_t LastDist = LocalLoadsDists.front().second; + int LastDist = LocalLoadsDists.front().second; bool AllowMaskedGather = IsMaskedGatherSupported(OriginalLoads); - for (const std::pair &L : LocalLoadsDists) { + for (const std::pair &L : LocalLoadsDists) { if (isVectorized(L.first)) continue; assert(LastDist >= L.second && "Expected first distance always not less than second"); - if (static_cast(LastDist - L.second) == + if (static_cast(LastDist - L.second) == CurrentConsecutiveDist) { ++CurrentConsecutiveDist; MaxConsecutiveDistance = @@ -8709,12 +8698,12 @@ void BoUpSLP::tryToVectorizeGatheredLoads( if (!Ref.empty() && !NonVectorized.empty() && std::accumulate( Ref.begin(), Ref.end(), 0u, - [](unsigned S, ArrayRef> LoadsDists) - -> unsigned { return S + LoadsDists.size(); }) != - NonVectorized.size() && + [](unsigned S, + ArrayRef> LoadsDists) -> unsigned { + return S + LoadsDists.size(); + }) != NonVectorized.size() && IsMaskedGatherSupported(NonVectorized)) { - SmallVector>> - FinalGatheredLoads; + SmallVector>> FinalGatheredLoads; for (LoadInst *LI : NonVectorized) { // Reinsert non-vectorized loads to other list of loads with the same // base pointers. @@ -9310,10 +9299,10 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState( Ptr0 = PointerOps[CurrentOrder.front()]; PtrN = PointerOps[CurrentOrder.back()]; } - std::optional Dist = + std::optional Dist = getPointersDiff(ScalarTy, Ptr0, ScalarTy, PtrN, *DL, *SE); // Check that the sorted pointer operands are consecutive. - if (static_cast(*Dist) == VL.size() - 1) + if (static_cast(*Dist) == VL.size() - 1) return TreeEntry::Vectorize; } @@ -10223,7 +10212,7 @@ void BoUpSLP::buildTreeRec(ArrayRef VLRef, unsigned Depth, assert((allConstant(VLRef) || allSameType(VLRef)) && "Invalid types!"); SmallVector ReuseShuffleIndices; - SmallVector VL(VLRef); + SmallVector VL(VLRef.begin(), VLRef.end()); // Tries to build split node. auto TrySplitNode = [&](const InstructionsState &LocalState) { @@ -10762,7 +10751,7 @@ unsigned BoUpSLP::canMapToVector(Type *T) const { if (!isValidElementType(EltTy)) return 0; - size_t VTSize = DL->getTypeStoreSizeInBits(getWidenedType(EltTy, N)); + uint64_t VTSize = DL->getTypeStoreSizeInBits(getWidenedType(EltTy, N)); if (VTSize < MinVecRegSize || VTSize > MaxVecRegSize || VTSize != DL->getTypeStoreSizeInBits(T)) return 0; @@ -11961,7 +11950,7 @@ void BoUpSLP::transformNodes() { // A list of loads to be gathered during the vectorization process. We can // try to vectorize them at the end, if profitable. SmallMapVector, - SmallVector>>, 8> + SmallVector>>, 8> GatheredLoads; for (std::unique_ptr &TE : VectorizableTree) { @@ -13617,7 +13606,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, unsigned InterleaveFactor; SmallVector CompressMask; VectorType *LoadVecTy; - SmallVector Scalars(VL); + SmallVector Scalars(VL.begin(), VL.end()); if (!E->ReorderIndices.empty()) { SmallVector Mask(E->ReorderIndices.begin(), E->ReorderIndices.end()); @@ -15987,10 +15976,9 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef VL, bool ForPoisonSrc, } Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) { - auto It = EntryToLastInstruction.find(E); - if (It != EntryToLastInstruction.end()) - return *cast(It->second); - Instruction *Res = nullptr; + auto &Res = EntryToLastInstruction.try_emplace(E).first->second; + if (Res) + return *Res; // Get the basic block this bundle is in. All instructions in the bundle // should be in this block (except for extractelement-like instructions with // constant indices or gathered loads). @@ -16095,11 +16083,10 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) { auto *I = dyn_cast_or_null(E->VectorizedValue); if (!I) I = &getLastInstructionInBundle(E); - if (Res->getParent() == I->getParent() && Res->comesBefore(I)) + if (Res->comesBefore(I)) Res = I; } } - EntryToLastInstruction.try_emplace(E, Res); return *Res; } @@ -16108,7 +16095,6 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) { E->Idx >= *GatheredLoadsEntriesFirst && !E->isGather() && E->getOpcode() == Instruction::Load) { Res = FindFirstInst(); - EntryToLastInstruction.try_emplace(E, Res); return *Res; } @@ -16155,7 +16141,6 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) { Res = FindLastInst(); else Res = FindFirstInst(); - EntryToLastInstruction.try_emplace(E, Res); return *Res; } @@ -16166,7 +16151,6 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) { if (Bundle) { assert(!E->isGather() && "Gathered instructions should not be scheduled"); Res = Bundle->getBundle().back()->getInst(); - EntryToLastInstruction.try_emplace(E, Res); return *Res; } @@ -16191,7 +16175,6 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) { if (!Res) Res = FindLastInst(); assert(Res && "Failed to find last instruction in bundle"); - EntryToLastInstruction.try_emplace(E, Res); return *Res; } @@ -18281,13 +18264,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { Value *Ptr0 = cast(E->Scalars.front())->getPointerOperand(); Value *PtrN = cast(E->Scalars.back())->getPointerOperand(); PO = IsReverseOrder ? PtrN : Ptr0; - std::optional Diff = getPointersDiff( + std::optional Diff = getPointersDiff( VL0->getType(), Ptr0, VL0->getType(), PtrN, *DL, *SE); Type *StrideTy = DL->getIndexType(PO->getType()); Value *StrideVal; if (Diff) { - int64_t Stride = - *Diff / (static_cast(E->Scalars.size()) - 1); + int Stride = *Diff / (static_cast(E->Scalars.size()) - 1); StrideVal = ConstantInt::get(StrideTy, (IsReverseOrder ? -1 : 1) * Stride * DL->getTypeAllocSize(ScalarTy)); @@ -21145,18 +21127,18 @@ class RelatedStoreInsts { /// \p PtrDist. /// Does nothing if there is already a store with that \p PtrDist. /// \returns The previously associated Instruction index, or std::nullopt - std::optional insertOrLookup(unsigned InstrIdx, int64_t PtrDist) { + std::optional insertOrLookup(unsigned InstrIdx, int PtrDist) { auto [It, Inserted] = Instrs.emplace(PtrDist, InstrIdx); - return Inserted ? std::nullopt : std::make_optional(It->second); + return Inserted ? std::nullopt : std::optional(It->second); } - using DistToInstMap = std::map; + using DistToInstMap = std::map; const DistToInstMap &getStores() const { return Instrs; } /// If \p SI is related to this group of stores, return the distance of its /// pointer operand to the one the group's BaseInstr. - std::optional getPointerDiff(StoreInst &SI, const DataLayout &DL, - ScalarEvolution &SE) const { + std::optional getPointerDiff(StoreInst &SI, const DataLayout &DL, + ScalarEvolution &SE) const { StoreInst &BaseStore = *AllStores[BaseInstrIdx]; return getPointersDiff( BaseStore.getValueOperand()->getType(), BaseStore.getPointerOperand(), @@ -21167,7 +21149,7 @@ class RelatedStoreInsts { /// Recompute the pointer distances to be based on \p NewBaseInstIdx. /// Stores whose index is less than \p MinSafeIdx will be dropped. void rebase(unsigned MinSafeIdx, unsigned NewBaseInstIdx, - int64_t DistFromCurBase) { + int DistFromCurBase) { DistToInstMap PrevSet = std::move(Instrs); reset(NewBaseInstIdx); @@ -21183,7 +21165,7 @@ class RelatedStoreInsts { /// Remove all stores that have been vectorized from this group. void clearVectorizedStores(const BoUpSLP::ValueSet &VectorizedStores) { DistToInstMap::reverse_iterator LastVectorizedStore = find_if( - reverse(Instrs), [&](const std::pair &DistAndIdx) { + reverse(Instrs), [&](const std::pair &DistAndIdx) { return VectorizedStores.contains(AllStores[DistAndIdx.second]); }); @@ -21216,7 +21198,7 @@ bool SLPVectorizerPass::vectorizeStores( bool Changed = false; auto TryToVectorize = [&](const RelatedStoreInsts::DistToInstMap &StoreSeq) { - int64_t PrevDist = -1; + int PrevDist = -1; BoUpSLP::ValueList Operands; // Collect the chain into a list. for (auto [Idx, Data] : enumerate(StoreSeq)) { @@ -21517,7 +21499,7 @@ bool SLPVectorizerPass::vectorizeStores( // dependencies and no need to waste compile time to try to vectorize them. // - Try to vectorize the sequence {1, {1, 0}, {3, 2}}. auto FillStoresSet = [&](unsigned Idx, StoreInst *SI) { - std::optional PtrDist; + std::optional PtrDist; auto *RelatedStores = find_if( SortedStores, [&PtrDist, SI, this](const RelatedStoreInsts &StoreSeq) { PtrDist = StoreSeq.getPointerDiff(*SI, *DL, *SE); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 5fd7a369bf735..2c4cac7655ec9 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1135,9 +1135,7 @@ class VPPhiAccessors { const VPBasicBlock *getIncomingBlock(unsigned Idx) const; /// Returns the number of incoming values, also number of incoming blocks. - virtual unsigned getNumIncoming() const { - return getAsRecipe()->getNumOperands(); - } + unsigned getNumIncoming() const { return getAsRecipe()->getNumOperands(); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the recipe. @@ -1236,7 +1234,7 @@ class VPIRInstruction : public VPRecipeBase { /// cast/dyn_cast/isa and execute() implementation. A single VPValue operand is /// allowed, and it is used to add a new incoming value for the single /// predecessor VPBB. -struct VPIRPhi : public VPIRInstruction, public VPPhiAccessors { +struct VPIRPhi : public VPIRInstruction { VPIRPhi(PHINode &PN) : VPIRInstruction(PN) {} static inline bool classof(const VPRecipeBase *U) { @@ -1253,9 +1251,6 @@ struct VPIRPhi : public VPIRInstruction, public VPPhiAccessors { void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override; #endif - -protected: - const VPRecipeBase *getAsRecipe() const override { return this; } }; /// Helper to manage IR metadata for recipes. It filters out metadata that @@ -1790,15 +1785,13 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags, /// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a /// pointer induction. Produces either a vector PHI per-part or scalar values /// per-lane based on the canonical induction. -class VPHeaderPHIRecipe : public VPSingleDefRecipe, public VPPhiAccessors { +class VPHeaderPHIRecipe : public VPSingleDefRecipe { protected: VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr, VPValue *Start, DebugLoc DL = {}) : VPSingleDefRecipe(VPDefID, ArrayRef({Start}), UnderlyingInstr, DL) { } - const VPRecipeBase *getAsRecipe() const override { return this; } - public: ~VPHeaderPHIRecipe() override = default; @@ -1987,11 +1980,6 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe { return isUnrolled() ? getOperand(getNumOperands() - 2) : nullptr; } - /// Returns the number of incoming values, also number of incoming blocks. - /// Note that at the moment, VPWidenIntOrFpInductionRecipes only have a single - /// incoming value, its start value. - unsigned getNumIncoming() const override { return 1; } - /// Returns the first defined value as TruncInst, if it is one or nullptr /// otherwise. TruncInst *getTruncInst() { return Trunc; } @@ -3295,46 +3283,6 @@ class VPScalarIVStepsRecipe : public VPRecipeWithIRFlags, } }; -/// Casting from VPRecipeBase -> VPPhiAccessors is supported for all recipe -/// types implementing VPPhiAccessors. Used by isa<> & co. -template <> struct CastIsPossible { - static inline bool isPossible(const VPRecipeBase *f) { - // TODO: include VPPredInstPHIRecipe too, once it implements VPPhiAccessors. - return isa(f); - } -}; -/// Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the -/// recipe types implementing VPPhiAccessors. Used by cast<>, dyn_cast<> & co. -template <> -struct CastInfo - : public CastIsPossible { - - using Self = CastInfo; - - /// doCast is used by cast<>. - static inline VPPhiAccessors *doCast(const VPRecipeBase *R) { - return const_cast([R]() -> const VPPhiAccessors * { - switch (R->getVPDefID()) { - case VPDef::VPInstructionSC: - return cast(R); - case VPDef::VPIRInstructionSC: - return cast(R); - case VPDef::VPWidenPHISC: - return cast(R); - default: - return cast(R); - } - }()); - } - - /// doCastIfPossible is used by dyn_cast<>. - static inline VPPhiAccessors *doCastIfPossible(const VPRecipeBase *f) { - if (!Self::isPossible(f)) - return nullptr; - return doCast(f); - } -}; - /// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It /// holds a sequence of zero or more VPRecipe's each representing a sequence of /// output IR instructions. All PHI-like recipes must come before any non-PHI recipes. diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 1e7e039a18d56..b8205545a4f5e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -192,7 +192,8 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) { if (!verifyPhiRecipes(VPBB)) return false; - // Verify that defs in VPBB dominate all their uses. + // Verify that defs in VPBB dominate all their uses. The current + // implementation is still incomplete. DenseMap RecipeNumbering; unsigned Cnt = 0; for (const VPRecipeBase &R : *VPBB) @@ -219,31 +220,12 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) { for (const VPUser *U : V->users()) { auto *UI = cast(U); - if (auto *Phi = dyn_cast(UI)) { - for (unsigned Idx = 0; Idx != Phi->getNumIncoming(); ++Idx) { - VPValue *IncomingVPV = Phi->getIncomingValue(Idx); - if (IncomingVPV != V) - continue; - - const VPBasicBlock *IncomingVPBB = Phi->getIncomingBlock(Idx); - if (VPDT.dominates(VPBB, IncomingVPBB)) - continue; - - errs() << "Incoming def at index " << Idx - << " does not dominate incoming block!\n"; -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - VPSlotTracker Tracker(VPBB->getPlan()); - IncomingVPV->getDefiningRecipe()->print(errs(), " ", Tracker); - errs() << "\n does not dominate " << IncomingVPBB->getName() - << " for\n"; - UI->print(errs(), " ", Tracker); -#endif - return false; - } - continue; - } - // TODO: Also verify VPPredInstPHIRecipe. - if (isa(UI)) + // TODO: check dominance of incoming values for phis properly. + if (!UI || + isa(UI) || + (isa(UI) && + cast(UI)->getOpcode() == Instruction::PHI)) continue; // If the user is in the same block, check it comes after R in the diff --git a/llvm/test/Assembler/autoupgrade-thread-pointer.ll b/llvm/test/Assembler/autoupgrade-thread-pointer.ll index 178e31f50b1bf..b1ed15a7e4ef8 100644 --- a/llvm/test/Assembler/autoupgrade-thread-pointer.ll +++ b/llvm/test/Assembler/autoupgrade-thread-pointer.ll @@ -6,14 +6,14 @@ declare ptr @llvm.arm.thread.pointer() define ptr @test1() { ; CHECK-LABEL: define ptr @test1() -; CHECK: call ptr @llvm.thread.pointer.p0() +; CHECK: call ptr @llvm.thread.pointer() %1 = call ptr @llvm.aarch64.thread.pointer() ret ptr %1 } define ptr @test2() { ; CHECK-LABEL: define ptr @test2() -; CHECK: call ptr @llvm.thread.pointer.p0() +; CHECK: call ptr @llvm.thread.pointer() %1 = call ptr @llvm.arm.thread.pointer() ret ptr %1 } diff --git a/llvm/test/CodeGen/AArch64/aarch64-sme-stubs.ll b/llvm/test/CodeGen/AArch64/aarch64-sme-stubs.ll deleted file mode 100644 index f7182e2a166a5..0000000000000 --- a/llvm/test/CodeGen/AArch64/aarch64-sme-stubs.ll +++ /dev/null @@ -1,47 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -mtriple aarch64-none-linux-gnu | FileCheck %s - -; Checks SME ABI routines can be implemented as stubs without +sme. - -define i1 @__aarch64_sme_accessible() { -; CHECK-LABEL: __aarch64_sme_accessible: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w0, #1 // =0x1 -; CHECK-NEXT: ret -entry: - ret i1 true -} - -define [2 x i64] @__arm_sme_state() { -; CHECK-LABEL: __arm_sme_state: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov x0, xzr -; CHECK-NEXT: mov x1, xzr -; CHECK-NEXT: ret -entry: - ret [2 x i64] zeroinitializer -} - -define void @__arm_tpidr2_restore() { -; CHECK-LABEL: __arm_tpidr2_restore: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ret -entry: - ret void -} - -define void @__arm_tpidr2_save() { -; CHECK-LABEL: __arm_tpidr2_save: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ret -entry: - ret void -} - -define void @__arm_za_disable() { -; CHECK-LABEL: __arm_za_disable: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ret -entry: - ret void -} diff --git a/llvm/test/CodeGen/AArch64/arm64-arith-saturating.ll b/llvm/test/CodeGen/AArch64/arm64-arith-saturating.ll index 07c4dbcf41096..e2d530ab421ef 100644 --- a/llvm/test/CodeGen/AArch64/arm64-arith-saturating.ll +++ b/llvm/test/CodeGen/AArch64/arm64-arith-saturating.ll @@ -193,7 +193,7 @@ define i32 @vqmovnd_u(<2 x i64> %b) nounwind readnone { define i32 @uqxtn_ext(<4 x i32> noundef %a, <4 x i32> noundef %b, i32 %c, float %d, <2 x i64> %e) { ; CHECK-LABEL: uqxtn_ext: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov d0, v3.d[1] +; CHECK-NEXT: mov v0.d[0], v3.d[1] ; CHECK-NEXT: uqxtn s0, d0 ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -219,7 +219,7 @@ entry: define <4 x i32> @sqxtun_insext(<4 x i32> noundef %a, <2 x i64> %e) { ; CHECK-LABEL: sqxtun_insext: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov d1, v1.d[1] +; CHECK-NEXT: mov v1.d[0], v1.d[1] ; CHECK-NEXT: sqxtun s1, d1 ; CHECK-NEXT: mov v0.s[3], v1.s[0] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-vshift.ll b/llvm/test/CodeGen/AArch64/arm64-vshift.ll index a7f9ca8d73c1f..2f543cc324bc2 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vshift.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vshift.ll @@ -95,7 +95,6 @@ ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli4h ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli2s ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli1d -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli1d_imm0 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli16b ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli8h ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sli4s @@ -4089,16 +4088,6 @@ define <1 x i64> @sli1d(ptr %A, ptr %B) nounwind { ret <1 x i64> %tmp3 } -; Ensure we can select scalar SLI with a zero shift (see issue #139879). -define <1 x i64> @sli1d_imm0(<1 x i64> %a, <1 x i64> %b) { -; CHECK-LABEL: sli1d_imm0: -; CHECK: // %bb.0: -; CHECK-NEXT: sli d0, d1, #0 -; CHECK-NEXT: ret - %r = call <1 x i64> @llvm.aarch64.neon.vsli(<1 x i64> %a, <1 x i64> %b, i32 0) - ret <1 x i64> %r -} - define <16 x i8> @sli16b(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: sli16b: ; CHECK: // %bb.0: diff --git a/llvm/test/CodeGen/AArch64/bitcast-extend.ll b/llvm/test/CodeGen/AArch64/bitcast-extend.ll index 3133d0efb4b9b..33238ccf86a39 100644 --- a/llvm/test/CodeGen/AArch64/bitcast-extend.ll +++ b/llvm/test/CodeGen/AArch64/bitcast-extend.ll @@ -70,8 +70,8 @@ define <4 x i64> @z_i32_v4i64(i32 %x) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fmov s0, w0 ; CHECK-SD-NEXT: movi v1.2d, #0x000000000000ff -; CHECK-SD-NEXT: mov b2, v0.b[0] -; CHECK-SD-NEXT: mov b3, v0.b[2] +; CHECK-SD-NEXT: mov v2.b[0], v0.b[0] +; CHECK-SD-NEXT: mov v3.b[0], v0.b[2] ; CHECK-SD-NEXT: mov v2.b[4], v0.b[1] ; CHECK-SD-NEXT: mov v3.b[4], v0.b[3] ; CHECK-SD-NEXT: ushll v0.2d, v2.2s, #0 @@ -172,8 +172,8 @@ define <4 x i64> @s_i32_v4i64(i32 %x) { ; CHECK-SD-LABEL: s_i32_v4i64: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fmov s0, w0 -; CHECK-SD-NEXT: mov b1, v0.b[0] -; CHECK-SD-NEXT: mov b2, v0.b[2] +; CHECK-SD-NEXT: mov v1.b[0], v0.b[0] +; CHECK-SD-NEXT: mov v2.b[0], v0.b[2] ; CHECK-SD-NEXT: mov v1.b[4], v0.b[1] ; CHECK-SD-NEXT: mov v2.b[4], v0.b[3] ; CHECK-SD-NEXT: ushll v0.2d, v1.2s, #0 diff --git a/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll b/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll index 65da95e0163f4..e90b6cb7f809b 100644 --- a/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll +++ b/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll @@ -5,7 +5,7 @@ define <4 x i16> @test_reconstructshuffle(<16 x i8> %a, <16 x i8> %b) nounwind { ; CHECKLE-LABEL: test_reconstructshuffle: ; CHECKLE: // %bb.0: -; CHECKLE-NEXT: mov b2, v0.b[3] +; CHECKLE-NEXT: mov v2.b[0], v0.b[3] ; CHECKLE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 ; CHECKLE-NEXT: mov v2.b[2], v0.b[2] ; CHECKLE-NEXT: mov v2.b[4], v0.b[1] @@ -21,7 +21,7 @@ define <4 x i16> @test_reconstructshuffle(<16 x i8> %a, <16 x i8> %b) nounwind { ; CHECKBE-NEXT: rev64 v1.16b, v1.16b ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 -; CHECKBE-NEXT: mov b2, v0.b[3] +; CHECKBE-NEXT: mov v2.b[0], v0.b[3] ; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 ; CHECKBE-NEXT: mov v2.b[2], v0.b[2] ; CHECKBE-NEXT: mov v2.b[4], v0.b[1] diff --git a/llvm/test/CodeGen/AArch64/fp16-vector-shuffle.ll b/llvm/test/CodeGen/AArch64/fp16-vector-shuffle.ll index 05422d3cc6051..97c3a4937cda7 100644 --- a/llvm/test/CodeGen/AArch64/fp16-vector-shuffle.ll +++ b/llvm/test/CodeGen/AArch64/fp16-vector-shuffle.ll @@ -347,8 +347,9 @@ define half @get_lane_64(<4 x half> %a) #0 { ; CHECK-LABEL: get_lane_64: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov h0, v0.h[2] -; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0 +; CHECK-NEXT: umov w8, v0.h[2] +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0 ; CHECK-NEXT: ret entry: %0 = bitcast <4 x half> %a to <4 x i16> @@ -361,8 +362,9 @@ entry: define half @get_lane_128(<8 x half> %a) #0 { ; CHECK-LABEL: get_lane_128: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov h0, v0.h[2] -; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0 +; CHECK-NEXT: umov w8, v0.h[2] +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0 ; CHECK-NEXT: ret entry: %0 = bitcast <8 x half> %a to <8 x i16> diff --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll index 34858940370e9..fb2bdb4d63f47 100644 --- a/llvm/test/CodeGen/AArch64/itofp.ll +++ b/llvm/test/CodeGen/AArch64/itofp.ll @@ -3443,10 +3443,10 @@ define <8 x double> @stofp_v8i8_v8f64(<8 x i8> %a) { ; CHECK-SD-LABEL: stofp_v8i8_v8f64: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: mov b1, v0.b[0] -; CHECK-SD-NEXT: mov b2, v0.b[2] -; CHECK-SD-NEXT: mov b3, v0.b[4] -; CHECK-SD-NEXT: mov b4, v0.b[6] +; CHECK-SD-NEXT: mov v1.b[0], v0.b[0] +; CHECK-SD-NEXT: mov v2.b[0], v0.b[2] +; CHECK-SD-NEXT: mov v3.b[0], v0.b[4] +; CHECK-SD-NEXT: mov v4.b[0], v0.b[6] ; CHECK-SD-NEXT: mov v1.b[4], v0.b[1] ; CHECK-SD-NEXT: mov v2.b[4], v0.b[3] ; CHECK-SD-NEXT: mov v3.b[4], v0.b[5] @@ -3492,10 +3492,10 @@ define <8 x double> @utofp_v8i8_v8f64(<8 x i8> %a) { ; CHECK-SD-LABEL: utofp_v8i8_v8f64: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: mov b2, v0.b[0] -; CHECK-SD-NEXT: mov b3, v0.b[2] -; CHECK-SD-NEXT: mov b4, v0.b[4] -; CHECK-SD-NEXT: mov b5, v0.b[6] +; CHECK-SD-NEXT: mov v2.b[0], v0.b[0] +; CHECK-SD-NEXT: mov v3.b[0], v0.b[2] +; CHECK-SD-NEXT: mov v4.b[0], v0.b[4] +; CHECK-SD-NEXT: mov v5.b[0], v0.b[6] ; CHECK-SD-NEXT: movi d1, #0x0000ff000000ff ; CHECK-SD-NEXT: mov v2.b[4], v0.b[1] ; CHECK-SD-NEXT: mov v3.b[4], v0.b[3] @@ -3538,14 +3538,14 @@ define <16 x double> @stofp_v16i8_v16f64(<16 x i8> %a) { ; CHECK-SD-LABEL: stofp_v16i8_v16f64: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-SD-NEXT: mov b2, v0.b[0] -; CHECK-SD-NEXT: mov b3, v0.b[2] -; CHECK-SD-NEXT: mov b4, v0.b[4] -; CHECK-SD-NEXT: mov b5, v0.b[6] -; CHECK-SD-NEXT: mov b6, v1.b[0] -; CHECK-SD-NEXT: mov b7, v1.b[2] -; CHECK-SD-NEXT: mov b16, v1.b[4] -; CHECK-SD-NEXT: mov b17, v1.b[6] +; CHECK-SD-NEXT: mov v2.b[0], v0.b[0] +; CHECK-SD-NEXT: mov v3.b[0], v0.b[2] +; CHECK-SD-NEXT: mov v4.b[0], v0.b[4] +; CHECK-SD-NEXT: mov v5.b[0], v0.b[6] +; CHECK-SD-NEXT: mov v6.b[0], v1.b[0] +; CHECK-SD-NEXT: mov v7.b[0], v1.b[2] +; CHECK-SD-NEXT: mov v16.b[0], v1.b[4] +; CHECK-SD-NEXT: mov v17.b[0], v1.b[6] ; CHECK-SD-NEXT: mov v2.b[4], v0.b[1] ; CHECK-SD-NEXT: mov v3.b[4], v0.b[3] ; CHECK-SD-NEXT: mov v4.b[4], v0.b[5] @@ -3622,15 +3622,15 @@ define <16 x double> @utofp_v16i8_v16f64(<16 x i8> %a) { ; CHECK-SD-LABEL: utofp_v16i8_v16f64: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: ext v2.16b, v0.16b, v0.16b, #8 -; CHECK-SD-NEXT: mov b3, v0.b[0] -; CHECK-SD-NEXT: mov b4, v0.b[2] -; CHECK-SD-NEXT: mov b5, v0.b[4] -; CHECK-SD-NEXT: mov b6, v0.b[6] +; CHECK-SD-NEXT: mov v3.b[0], v0.b[0] +; CHECK-SD-NEXT: mov v4.b[0], v0.b[2] +; CHECK-SD-NEXT: mov v5.b[0], v0.b[4] +; CHECK-SD-NEXT: mov v6.b[0], v0.b[6] ; CHECK-SD-NEXT: movi d1, #0x0000ff000000ff -; CHECK-SD-NEXT: mov b7, v2.b[0] -; CHECK-SD-NEXT: mov b16, v2.b[2] -; CHECK-SD-NEXT: mov b17, v2.b[4] -; CHECK-SD-NEXT: mov b18, v2.b[6] +; CHECK-SD-NEXT: mov v7.b[0], v2.b[0] +; CHECK-SD-NEXT: mov v16.b[0], v2.b[2] +; CHECK-SD-NEXT: mov v17.b[0], v2.b[4] +; CHECK-SD-NEXT: mov v18.b[0], v2.b[6] ; CHECK-SD-NEXT: mov v3.b[4], v0.b[1] ; CHECK-SD-NEXT: mov v4.b[4], v0.b[3] ; CHECK-SD-NEXT: mov v5.b[4], v0.b[5] @@ -3699,18 +3699,18 @@ define <32 x double> @stofp_v32i8_v32f64(<32 x i8> %a) { ; CHECK-SD-LABEL: stofp_v32i8_v32f64: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: ext v3.16b, v0.16b, v0.16b, #8 -; CHECK-SD-NEXT: mov b5, v1.b[6] -; CHECK-SD-NEXT: mov b17, v1.b[4] -; CHECK-SD-NEXT: mov b20, v1.b[2] -; CHECK-SD-NEXT: mov b21, v1.b[0] -; CHECK-SD-NEXT: mov b18, v0.b[0] -; CHECK-SD-NEXT: mov b19, v0.b[6] -; CHECK-SD-NEXT: mov b22, v0.b[4] +; CHECK-SD-NEXT: mov v5.b[0], v1.b[6] +; CHECK-SD-NEXT: mov v17.b[0], v1.b[4] +; CHECK-SD-NEXT: mov v20.b[0], v1.b[2] +; CHECK-SD-NEXT: mov v21.b[0], v1.b[0] +; CHECK-SD-NEXT: mov v18.b[0], v0.b[0] +; CHECK-SD-NEXT: mov v19.b[0], v0.b[6] +; CHECK-SD-NEXT: mov v22.b[0], v0.b[4] ; CHECK-SD-NEXT: ext v16.16b, v1.16b, v1.16b, #8 -; CHECK-SD-NEXT: mov b2, v3.b[0] -; CHECK-SD-NEXT: mov b4, v3.b[2] -; CHECK-SD-NEXT: mov b6, v3.b[4] -; CHECK-SD-NEXT: mov b7, v3.b[6] +; CHECK-SD-NEXT: mov v2.b[0], v3.b[0] +; CHECK-SD-NEXT: mov v4.b[0], v3.b[2] +; CHECK-SD-NEXT: mov v6.b[0], v3.b[4] +; CHECK-SD-NEXT: mov v7.b[0], v3.b[6] ; CHECK-SD-NEXT: mov v5.b[4], v1.b[7] ; CHECK-SD-NEXT: mov v17.b[4], v1.b[5] ; CHECK-SD-NEXT: mov v20.b[4], v1.b[3] @@ -3718,16 +3718,16 @@ define <32 x double> @stofp_v32i8_v32f64(<32 x i8> %a) { ; CHECK-SD-NEXT: mov v19.b[4], v0.b[7] ; CHECK-SD-NEXT: mov v22.b[4], v0.b[5] ; CHECK-SD-NEXT: mov v18.b[4], v0.b[1] -; CHECK-SD-NEXT: mov b23, v16.b[0] +; CHECK-SD-NEXT: mov v23.b[0], v16.b[0] ; CHECK-SD-NEXT: mov v2.b[4], v3.b[1] ; CHECK-SD-NEXT: mov v4.b[4], v3.b[3] ; CHECK-SD-NEXT: mov v6.b[4], v3.b[5] ; CHECK-SD-NEXT: mov v7.b[4], v3.b[7] -; CHECK-SD-NEXT: mov b3, v0.b[2] +; CHECK-SD-NEXT: mov v3.b[0], v0.b[2] ; CHECK-SD-NEXT: shl v5.2s, v5.2s, #24 ; CHECK-SD-NEXT: shl v17.2s, v17.2s, #24 ; CHECK-SD-NEXT: shl v20.2s, v20.2s, #24 -; CHECK-SD-NEXT: mov b24, v16.b[4] +; CHECK-SD-NEXT: mov v24.b[0], v16.b[4] ; CHECK-SD-NEXT: mov v23.b[4], v16.b[1] ; CHECK-SD-NEXT: shl v18.2s, v18.2s, #24 ; CHECK-SD-NEXT: shl v19.2s, v19.2s, #24 @@ -3739,10 +3739,10 @@ define <32 x double> @stofp_v32i8_v32f64(<32 x i8> %a) { ; CHECK-SD-NEXT: shl v0.2s, v21.2s, #24 ; CHECK-SD-NEXT: shl v4.2s, v6.2s, #24 ; CHECK-SD-NEXT: shl v6.2s, v7.2s, #24 -; CHECK-SD-NEXT: mov b7, v16.b[2] +; CHECK-SD-NEXT: mov v7.b[0], v16.b[2] ; CHECK-SD-NEXT: sshll v5.2d, v5.2s, #0 ; CHECK-SD-NEXT: sshr v20.2s, v20.2s, #24 -; CHECK-SD-NEXT: mov b21, v16.b[6] +; CHECK-SD-NEXT: mov v21.b[0], v16.b[6] ; CHECK-SD-NEXT: sshll v17.2d, v17.2s, #0 ; CHECK-SD-NEXT: sshr v0.2s, v0.2s, #24 ; CHECK-SD-NEXT: shl v22.2s, v22.2s, #24 @@ -3869,25 +3869,25 @@ entry: define <32 x double> @utofp_v32i8_v32f64(<32 x i8> %a) { ; CHECK-SD-LABEL: utofp_v32i8_v32f64: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: mov b6, v1.b[6] -; CHECK-SD-NEXT: mov b7, v1.b[4] +; CHECK-SD-NEXT: mov v6.b[0], v1.b[6] +; CHECK-SD-NEXT: mov v7.b[0], v1.b[4] ; CHECK-SD-NEXT: ext v3.16b, v1.16b, v1.16b, #8 -; CHECK-SD-NEXT: mov b16, v1.b[2] -; CHECK-SD-NEXT: mov b17, v1.b[0] -; CHECK-SD-NEXT: mov b19, v0.b[6] -; CHECK-SD-NEXT: mov b20, v0.b[4] +; CHECK-SD-NEXT: mov v16.b[0], v1.b[2] +; CHECK-SD-NEXT: mov v17.b[0], v1.b[0] +; CHECK-SD-NEXT: mov v19.b[0], v0.b[6] +; CHECK-SD-NEXT: mov v20.b[0], v0.b[4] ; CHECK-SD-NEXT: movi d5, #0x0000ff000000ff -; CHECK-SD-NEXT: mov b24, v0.b[2] -; CHECK-SD-NEXT: mov b25, v0.b[0] +; CHECK-SD-NEXT: mov v24.b[0], v0.b[2] +; CHECK-SD-NEXT: mov v25.b[0], v0.b[0] ; CHECK-SD-NEXT: ext v2.16b, v0.16b, v0.16b, #8 ; CHECK-SD-NEXT: mov v6.b[4], v1.b[7] ; CHECK-SD-NEXT: mov v7.b[4], v1.b[5] -; CHECK-SD-NEXT: mov b18, v3.b[0] -; CHECK-SD-NEXT: mov b21, v3.b[2] -; CHECK-SD-NEXT: mov b23, v3.b[4] +; CHECK-SD-NEXT: mov v18.b[0], v3.b[0] +; CHECK-SD-NEXT: mov v21.b[0], v3.b[2] +; CHECK-SD-NEXT: mov v23.b[0], v3.b[4] ; CHECK-SD-NEXT: mov v16.b[4], v1.b[3] ; CHECK-SD-NEXT: mov v17.b[4], v1.b[1] -; CHECK-SD-NEXT: mov b1, v3.b[6] +; CHECK-SD-NEXT: mov v1.b[0], v3.b[6] ; CHECK-SD-NEXT: mov v19.b[4], v0.b[7] ; CHECK-SD-NEXT: mov v20.b[4], v0.b[5] ; CHECK-SD-NEXT: mov v24.b[4], v0.b[3] @@ -3905,15 +3905,15 @@ define <32 x double> @utofp_v32i8_v32f64(<32 x i8> %a) { ; CHECK-SD-NEXT: ushll v7.2d, v7.2s, #0 ; CHECK-SD-NEXT: and v20.8b, v20.8b, v5.8b ; CHECK-SD-NEXT: ushll v16.2d, v16.2s, #0 -; CHECK-SD-NEXT: mov b4, v2.b[0] -; CHECK-SD-NEXT: mov b22, v2.b[2] +; CHECK-SD-NEXT: mov v4.b[0], v2.b[0] +; CHECK-SD-NEXT: mov v22.b[0], v2.b[2] ; CHECK-SD-NEXT: ushll v17.2d, v17.2s, #0 ; CHECK-SD-NEXT: ushll v0.2d, v3.2s, #0 -; CHECK-SD-NEXT: mov b19, v2.b[4] +; CHECK-SD-NEXT: mov v19.b[0], v2.b[4] ; CHECK-SD-NEXT: ucvtf v6.2d, v6.2d ; CHECK-SD-NEXT: ucvtf v3.2d, v7.2d ; CHECK-SD-NEXT: ushll v20.2d, v20.2s, #0 -; CHECK-SD-NEXT: mov b7, v2.b[6] +; CHECK-SD-NEXT: mov v7.b[0], v2.b[6] ; CHECK-SD-NEXT: ucvtf v16.2d, v16.2d ; CHECK-SD-NEXT: and v24.8b, v24.8b, v5.8b ; CHECK-SD-NEXT: ucvtf v17.2d, v17.2d diff --git a/llvm/test/CodeGen/AArch64/neon-bitcast.ll b/llvm/test/CodeGen/AArch64/neon-bitcast.ll index c6aa8701e1721..c039da26b7c15 100644 --- a/llvm/test/CodeGen/AArch64/neon-bitcast.ll +++ b/llvm/test/CodeGen/AArch64/neon-bitcast.ll @@ -555,7 +555,7 @@ define <2 x i8> @bitcast_i16_to_v2i8(i16 %word) { ; CHECK-LE-LABEL: bitcast_i16_to_v2i8: ; CHECK-LE: // %bb.0: ; CHECK-LE-NEXT: fmov s1, w0 -; CHECK-LE-NEXT: mov b0, v1.b[0] +; CHECK-LE-NEXT: mov v0.b[0], v1.b[0] ; CHECK-LE-NEXT: mov v0.b[4], v1.b[1] ; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-LE-NEXT: ret @@ -564,7 +564,7 @@ define <2 x i8> @bitcast_i16_to_v2i8(i16 %word) { ; CHECK-BE: // %bb.0: ; CHECK-BE-NEXT: fmov s0, w0 ; CHECK-BE-NEXT: rev16 v0.16b, v0.16b -; CHECK-BE-NEXT: mov b1, v0.b[0] +; CHECK-BE-NEXT: mov v1.b[0], v0.b[0] ; CHECK-BE-NEXT: mov v1.b[4], v0.b[1] ; CHECK-BE-NEXT: rev64 v0.2s, v1.2s ; CHECK-BE-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/neon-insert-sve-elt.ll b/llvm/test/CodeGen/AArch64/neon-insert-sve-elt.ll index bfdf794c1c27a..0f4eec4fdfda1 100644 --- a/llvm/test/CodeGen/AArch64/neon-insert-sve-elt.ll +++ b/llvm/test/CodeGen/AArch64/neon-insert-sve-elt.ll @@ -360,7 +360,8 @@ define <4 x i32> @test_q_lane4_nxv4i32(<4 x i32> %a, %b) { define <1 x double> @test_lane0_nxv2f64(<1 x double> %a, %b) { ; CHECK-LABEL: test_lane0_nxv2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d0, v1.d[0] +; CHECK-NEXT: mov v0.d[0], v1.d[0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %c = extractelement %b, i32 0 %d = insertelement <1 x double> %a, double %c, i32 0 @@ -370,7 +371,8 @@ define <1 x double> @test_lane0_nxv2f64(<1 x double> %a, % define <1 x double> @test_lane1_nxv2f64(<1 x double> %a, %b) { ; CHECK-LABEL: test_lane1_nxv2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d0, v1.d[1] +; CHECK-NEXT: mov v0.d[0], v1.d[1] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %c = extractelement %b, i32 1 %d = insertelement <1 x double> %a, double %c, i32 0 @@ -414,7 +416,8 @@ define <2 x double> @test_q_lane2_nxv2f64(<2 x double> %a, define <1 x i64> @test_lane0_nxv2i64(<1 x i64> %a, %b) { ; CHECK-LABEL: test_lane0_nxv2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d0, v1.d[0] +; CHECK-NEXT: mov v0.d[0], v1.d[0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %c = extractelement %b, i32 0 %d = insertelement <1 x i64> %a, i64 %c, i32 0 @@ -424,7 +427,8 @@ define <1 x i64> @test_lane0_nxv2i64(<1 x i64> %a, %b) { define <1 x i64> @test_lane1_nxv2i64(<1 x i64> %a, %b) { ; CHECK-LABEL: test_lane1_nxv2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d0, v1.d[1] +; CHECK-NEXT: mov v0.d[0], v1.d[1] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %c = extractelement %b, i32 1 %d = insertelement <1 x i64> %a, i64 %c, i32 0 diff --git a/llvm/test/CodeGen/AArch64/neon-insextbitcast.ll b/llvm/test/CodeGen/AArch64/neon-insextbitcast.ll index d9cdbc2f92aca..ebff3f1370040 100644 --- a/llvm/test/CodeGen/AArch64/neon-insextbitcast.ll +++ b/llvm/test/CodeGen/AArch64/neon-insextbitcast.ll @@ -89,58 +89,11 @@ entry: } -define half @test_vext_v8i16(<8 x i16> %a) { -; CHECK-LABEL: test_vext_v8i16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov h0, v0.h[5] -; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0 -; CHECK-NEXT: ret -entry: - %b = extractelement <8 x i16> %a, i32 5 - %c = bitcast i16 %b to half - ret half %c -} - -define half @test_vext_v8i16_0(<8 x i16> %a) { -; CHECK-LABEL: test_vext_v8i16_0: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0 -; CHECK-NEXT: ret -entry: - %b = extractelement <8 x i16> %a, i32 0 - %c = bitcast i16 %b to half - ret half %c -} - -define half @test_vext_v4i16(<4 x i16> %a) { -; CHECK-LABEL: test_vext_v4i16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov h0, v0.h[1] -; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0 -; CHECK-NEXT: ret -entry: - %b = extractelement <4 x i16> %a, i32 1 - %c = bitcast i16 %b to half - ret half %c -} - -define half @test_vext_v4i16_0(<4 x i16> %a) { -; CHECK-LABEL: test_vext_v4i16_0: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0 -; CHECK-NEXT: ret -entry: - %b = extractelement <4 x i16> %a, i32 0 - %c = bitcast i16 %b to half - ret half %c -} - define float @test_vext_v4i32(<4 x i32> %a) { ; CHECK-LABEL: test_vext_v4i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov s0, v0.s[3] +; CHECK-NEXT: mov v0.s[0], v0.s[3] +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: ret entry: %b = extractelement <4 x i32> %a, i32 3 @@ -163,7 +116,8 @@ define float @test_vext_v2i32(<2 x i32> %a) { ; CHECK-LABEL: test_vext_v2i32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s0, v0.s[1] +; CHECK-NEXT: mov v0.s[0], v0.s[1] +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: ret entry: %b = extractelement <2 x i32> %a, i32 1 @@ -186,7 +140,8 @@ entry: define double @test_vext_v2i64(<2 x i64> %a) { ; CHECK-LABEL: test_vext_v2i64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov d0, v0.d[1] +; CHECK-NEXT: mov v0.d[0], v0.d[1] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: %b = extractelement <2 x i64> %a, i32 1 diff --git a/llvm/test/CodeGen/AArch64/nofpclass.ll b/llvm/test/CodeGen/AArch64/nofpclass.ll deleted file mode 100644 index 3139aa0ef0bf6..0000000000000 --- a/llvm/test/CodeGen/AArch64/nofpclass.ll +++ /dev/null @@ -1,182 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc --mtriple=aarch64-linux-gnu < %s | FileCheck %s - -define float @f(float nofpclass(nan) %a, float nofpclass(nan) %b) { -; CHECK-LABEL: f: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmaxnm s0, s0, s1 -; CHECK-NEXT: ret -entry: - %cond = tail call float @llvm.maximumnum.f32(float %a, float %b) - ret float %cond -} - -define <4 x float> @fv4f32(<4 x float> nofpclass(nan) %a, <4 x float> nofpclass(nan) %b) { -; CHECK-LABEL: fv4f32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmaxnm v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ret -entry: - %c = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> %a, <4 x float> %b) - ret <4 x float> %c -} - -define {float, float} @m({float, float} nofpclass(nan) %a0, {float, float} nofpclass(nan) %a1) { -; CHECK-LABEL: m: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmaxnm s1, s1, s3 -; CHECK-NEXT: fmaxnm s0, s0, s2 -; CHECK-NEXT: ret -entry: - %a0f0 = extractvalue {float, float} %a0, 0 - %a0f1 = extractvalue {float, float} %a0, 1 - %a1f0 = extractvalue {float, float} %a1, 0 - %a1f1 = extractvalue {float, float} %a1, 1 - %max0 = tail call float @llvm.maximumnum.f32(float %a0f0, float %a1f0) - %max1 = tail call float @llvm.maximumnum.f32(float %a0f1, float %a1f1) - %ret0 = insertvalue {float, float} poison, float %max0, 0 - %ret1 = insertvalue {float, float} %ret0, float %max1, 1 - ret {float, float} %ret1 -} - -define [2 x float] @mA([2 x float] nofpclass(nan) %a0, [2 x float] nofpclass(nan) %a1) { -; CHECK-LABEL: mA: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmaxnm s1, s1, s3 -; CHECK-NEXT: fmaxnm s0, s0, s2 -; CHECK-NEXT: ret -entry: - %a0f0 = extractvalue [2 x float] %a0, 0 - %a0f1 = extractvalue [2 x float] %a0, 1 - %a1f0 = extractvalue [2 x float] %a1, 0 - %a1f1 = extractvalue [2 x float] %a1, 1 - %max0 = tail call float @llvm.maximumnum.f32(float %a0f0, float %a1f0) - %max1 = tail call float @llvm.maximumnum.f32(float %a0f1, float %a1f1) - %ret0 = insertvalue [2 x float] poison, float %max0, 0 - %ret1 = insertvalue [2 x float] %ret0, float %max1, 1 - ret [2 x float] %ret1 -} - -define float @fS(float nofpclass(snan) %a, float nofpclass(snan) %b) { -; CHECK-LABEL: fS: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmaxnm s0, s0, s1 -; CHECK-NEXT: ret -entry: - %cond = tail call float @llvm.maximumnum.f32(float %a, float %b) - ret float %cond -} - -define <4 x float> @fSv4f32(<4 x float> nofpclass(snan) %a, <4 x float> nofpclass(snan) %b) { -; CHECK-LABEL: fSv4f32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmaxnm v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ret -entry: - %c = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> %a, <4 x float> %b) - ret <4 x float> %c -} - -define {float, float} @mS({float, float} nofpclass(snan) %a0, {float, float} nofpclass(snan) %a1) { -; CHECK-LABEL: mS: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmaxnm s1, s1, s3 -; CHECK-NEXT: fmaxnm s0, s0, s2 -; CHECK-NEXT: ret -entry: - %a0f0 = extractvalue {float, float} %a0, 0 - %a0f1 = extractvalue {float, float} %a0, 1 - %a1f0 = extractvalue {float, float} %a1, 0 - %a1f1 = extractvalue {float, float} %a1, 1 - %max0 = tail call float @llvm.maximumnum.f32(float %a0f0, float %a1f0) - %max1 = tail call float @llvm.maximumnum.f32(float %a0f1, float %a1f1) - %ret0 = insertvalue {float, float} poison, float %max0, 0 - %ret1 = insertvalue {float, float} %ret0, float %max1, 1 - ret {float, float} %ret1 -} - -define [2 x float] @mAS([2 x float] nofpclass(snan) %a0, [2 x float] nofpclass(snan) %a1) { -; CHECK-LABEL: mAS: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmaxnm s1, s1, s3 -; CHECK-NEXT: fmaxnm s0, s0, s2 -; CHECK-NEXT: ret -entry: - %a0f0 = extractvalue [2 x float] %a0, 0 - %a0f1 = extractvalue [2 x float] %a0, 1 - %a1f0 = extractvalue [2 x float] %a1, 0 - %a1f1 = extractvalue [2 x float] %a1, 1 - %max0 = tail call float @llvm.maximumnum.f32(float %a0f0, float %a1f0) - %max1 = tail call float @llvm.maximumnum.f32(float %a0f1, float %a1f1) - %ret0 = insertvalue [2 x float] poison, float %max0, 0 - %ret1 = insertvalue [2 x float] %ret0, float %max1, 1 - ret [2 x float] %ret1 -} - -define float @fQ(float nofpclass(qnan) %a, float nofpclass(qnan) %b) { -; CHECK-LABEL: fQ: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fminnm s1, s1, s1 -; CHECK-NEXT: fminnm s0, s0, s0 -; CHECK-NEXT: fmaxnm s0, s0, s1 -; CHECK-NEXT: ret -entry: - %cond = tail call float @llvm.maximumnum.f32(float %a, float %b) - ret float %cond -} - -define <4 x float> @fQv4f32(<4 x float> nofpclass(qnan) %a, <4 x float> nofpclass(qnan) %b) { -; CHECK-LABEL: fQv4f32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fminnm v1.4s, v1.4s, v1.4s -; CHECK-NEXT: fminnm v0.4s, v0.4s, v0.4s -; CHECK-NEXT: fmaxnm v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ret -entry: - %c = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> %a, <4 x float> %b) - ret <4 x float> %c -} - -define {float, float} @mQ({float, float} nofpclass(qnan) %a0, {float, float} nofpclass(qnan) %a1) { -; CHECK-LABEL: mQ: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fminnm s2, s2, s2 -; CHECK-NEXT: fminnm s0, s0, s0 -; CHECK-NEXT: fminnm s3, s3, s3 -; CHECK-NEXT: fminnm s1, s1, s1 -; CHECK-NEXT: fmaxnm s0, s0, s2 -; CHECK-NEXT: fmaxnm s1, s1, s3 -; CHECK-NEXT: ret -entry: - %a0f0 = extractvalue {float, float} %a0, 0 - %a0f1 = extractvalue {float, float} %a0, 1 - %a1f0 = extractvalue {float, float} %a1, 0 - %a1f1 = extractvalue {float, float} %a1, 1 - %max0 = tail call float @llvm.maximumnum.f32(float %a0f0, float %a1f0) - %max1 = tail call float @llvm.maximumnum.f32(float %a0f1, float %a1f1) - %ret0 = insertvalue {float, float} poison, float %max0, 0 - %ret1 = insertvalue {float, float} %ret0, float %max1, 1 - ret {float, float} %ret1 -} - -define [2 x float] @mAQ([2 x float] nofpclass(qnan) %a0, [2 x float] nofpclass(qnan) %a1) { -; CHECK-LABEL: mAQ: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fminnm s2, s2, s2 -; CHECK-NEXT: fminnm s0, s0, s0 -; CHECK-NEXT: fminnm s3, s3, s3 -; CHECK-NEXT: fminnm s1, s1, s1 -; CHECK-NEXT: fmaxnm s0, s0, s2 -; CHECK-NEXT: fmaxnm s1, s1, s3 -; CHECK-NEXT: ret -entry: - %a0f0 = extractvalue [2 x float] %a0, 0 - %a0f1 = extractvalue [2 x float] %a0, 1 - %a1f0 = extractvalue [2 x float] %a1, 0 - %a1f1 = extractvalue [2 x float] %a1, 1 - %max0 = tail call float @llvm.maximumnum.f32(float %a0f0, float %a1f0) - %max1 = tail call float @llvm.maximumnum.f32(float %a0f1, float %a1f1) - %ret0 = insertvalue [2 x float] poison, float %max0, 0 - %ret1 = insertvalue [2 x float] %ret0, float %max1, 1 - ret [2 x float] %ret1 -} diff --git a/llvm/test/CodeGen/AArch64/reserveXreg.ll b/llvm/test/CodeGen/AArch64/reserveXreg.ll index 4a02675ec04fa..037ccab1525d1 100644 --- a/llvm/test/CodeGen/AArch64/reserveXreg.ll +++ b/llvm/test/CodeGen/AArch64/reserveXreg.ll @@ -1,9 +1,8 @@ ;; Check if manually reserved registers are always excluded from being saved by ;; the function prolog/epilog, even for callee-saved ones, as per GCC behavior. ;; Look at AArch64Features.td for registers excluded from this test. -;; FIXME: Fix machine verifier issues and remove -verify-machineinstrs=0. -; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -verify-machineinstrs=0 | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu | FileCheck %s define preserve_mostcc void @t1() "target-features"="+reserve-x1" { ; CHECK-LABEL: t1: diff --git a/llvm/test/CodeGen/AArch64/shuffle-extend.ll b/llvm/test/CodeGen/AArch64/shuffle-extend.ll index 1e8d053973eb2..7658e5ab6936b 100644 --- a/llvm/test/CodeGen/AArch64/shuffle-extend.ll +++ b/llvm/test/CodeGen/AArch64/shuffle-extend.ll @@ -4,7 +4,7 @@ define <2 x i8> @test_v16i8_v2i32_824(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: test_v16i8_v2i32_824: ; CHECK: // %bb.0: -; CHECK-NEXT: mov b0, v0.b[8] +; CHECK-NEXT: mov v0.b[0], v0.b[8] ; CHECK-NEXT: mov v0.b[4], v1.b[8] ; CHECK-NEXT: add v0.2s, v0.2s, v0.2s ; CHECK-NEXT: ret @@ -16,7 +16,7 @@ define <2 x i8> @test_v16i8_v2i32_824(<16 x i8> %a, <16 x i8> %b) { define <2 x i8> @test_v16i8_v2i32_016(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: test_v16i8_v2i32_016: ; CHECK: // %bb.0: -; CHECK-NEXT: mov b0, v0.b[0] +; CHECK-NEXT: mov v0.b[0], v0.b[0] ; CHECK-NEXT: mov v0.b[4], v1.b[0] ; CHECK-NEXT: add v0.2s, v0.2s, v0.2s ; CHECK-NEXT: ret @@ -30,7 +30,7 @@ define <2 x i8> @test_v8i8_v2i32_08(<8 x i8> %a, <8 x i8> %b) { ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: mov b0, v0.b[0] +; CHECK-NEXT: mov v0.b[0], v0.b[0] ; CHECK-NEXT: mov v0.b[4], v1.b[0] ; CHECK-NEXT: add v0.2s, v0.2s, v0.2s ; CHECK-NEXT: ret @@ -42,7 +42,7 @@ define <2 x i8> @test_v8i8_v2i32_08(<8 x i8> %a, <8 x i8> %b) { define <2 x i16> @test_v8i16_v2i32_08(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test_v8i16_v2i32_08: ; CHECK: // %bb.0: -; CHECK-NEXT: mov h0, v0.h[0] +; CHECK-NEXT: mov v0.h[0], v0.h[0] ; CHECK-NEXT: mov v0.h[2], v1.h[0] ; CHECK-NEXT: add v0.2s, v0.2s, v0.2s ; CHECK-NEXT: ret @@ -56,7 +56,7 @@ define <2 x i16> @test_v4i16_v2i32_04(<4 x i16> %a, <4 x i16> %b) { ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: mov h0, v0.h[0] +; CHECK-NEXT: mov v0.h[0], v0.h[0] ; CHECK-NEXT: mov v0.h[2], v1.h[0] ; CHECK-NEXT: add v0.2s, v0.2s, v0.2s ; CHECK-NEXT: ret @@ -69,7 +69,7 @@ define <2 x i16> @test_v4i16_v2i32_04(<4 x i16> %a, <4 x i16> %b) { define <4 x i8> @test_v16i8_v4i16_824(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: test_v16i8_v4i16_824: ; CHECK: // %bb.0: -; CHECK-NEXT: mov b2, v0.b[8] +; CHECK-NEXT: mov v2.b[0], v0.b[8] ; CHECK-NEXT: mov v2.b[2], v1.b[8] ; CHECK-NEXT: mov v2.b[4], v0.b[0] ; CHECK-NEXT: mov v2.b[6], v1.b[0] @@ -83,7 +83,7 @@ define <4 x i8> @test_v16i8_v4i16_824(<16 x i8> %a, <16 x i8> %b) { define <4 x i8> @test_v16i8_v4i16_016(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: test_v16i8_v4i16_016: ; CHECK: // %bb.0: -; CHECK-NEXT: mov b2, v0.b[0] +; CHECK-NEXT: mov v2.b[0], v0.b[0] ; CHECK-NEXT: mov v2.b[2], v1.b[0] ; CHECK-NEXT: mov v2.b[4], v0.b[4] ; CHECK-NEXT: mov v2.b[6], v1.b[4] @@ -98,7 +98,7 @@ define <4 x i8> @test_v8i8_v4i16_08(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: test_v8i8_v4i16_08: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov b2, v0.b[0] +; CHECK-NEXT: mov v2.b[0], v0.b[0] ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: mov v2.b[2], v1.b[0] ; CHECK-NEXT: mov v2.b[4], v0.b[4] @@ -200,8 +200,8 @@ define i1 @test2(ptr %add.ptr, ptr %result, <2 x i64> %hi, <2 x i64> %lo) { ; CHECK-NEXT: dup v2.2d, x9 ; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: and v3.16b, v4.16b, v3.16b -; CHECK-NEXT: mov b5, v0.b[8] -; CHECK-NEXT: mov b0, v0.b[0] +; CHECK-NEXT: mov v5.b[0], v0.b[8] +; CHECK-NEXT: mov v0.b[0], v0.b[0] ; CHECK-NEXT: mov v5.b[4], v3.b[8] ; CHECK-NEXT: mov v0.b[4], v3.b[0] ; CHECK-NEXT: add v3.2s, v5.2s, v5.2s diff --git a/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll b/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll index 130a316bcc2ba..6ea2267cd22e6 100644 --- a/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll +++ b/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll @@ -2,12 +2,11 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve,+sme2 < %s | FileCheck %s declare void @callee() -declare void @callee_sm() "aarch64_pstate_sm_enabled" declare void @callee_farg(float) declare float @callee_farg_fret(float) ; normal caller -> streaming callees -define void @test0(ptr %callee) nounwind { +define void @test0() nounwind { ; CHECK-LABEL: test0: ; CHECK: // %bb.0: ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill @@ -17,8 +16,8 @@ define void @test0(ptr %callee) nounwind { ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: smstart sm -; CHECK-NEXT: bl callee_sm -; CHECK-NEXT: bl callee_sm +; CHECK-NEXT: bl callee +; CHECK-NEXT: bl callee ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload @@ -26,8 +25,8 @@ define void @test0(ptr %callee) nounwind { ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret - call void @callee_sm() - call void @callee_sm() + call void @callee() "aarch64_pstate_sm_enabled" + call void @callee() "aarch64_pstate_sm_enabled" ret void } @@ -119,7 +118,7 @@ define void @test3() nounwind "aarch64_pstate_sm_compatible" { ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: smstart sm ; CHECK-NEXT: .LBB3_2: -; CHECK-NEXT: bl callee_sm +; CHECK-NEXT: bl callee ; CHECK-NEXT: tbnz w19, #0, .LBB3_4 ; CHECK-NEXT: // %bb.3: ; CHECK-NEXT: smstop sm @@ -141,7 +140,7 @@ define void @test3() nounwind "aarch64_pstate_sm_compatible" { ; CHECK-NEXT: // %bb.9: ; CHECK-NEXT: smstart sm ; CHECK-NEXT: .LBB3_10: -; CHECK-NEXT: bl callee_sm +; CHECK-NEXT: bl callee ; CHECK-NEXT: tbnz w19, #0, .LBB3_12 ; CHECK-NEXT: // %bb.11: ; CHECK-NEXT: smstop sm @@ -153,9 +152,9 @@ define void @test3() nounwind "aarch64_pstate_sm_compatible" { ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: ret - call void @callee_sm() + call void @callee() "aarch64_pstate_sm_enabled" call void @callee() - call void @callee_sm() + call void @callee() "aarch64_pstate_sm_enabled" ret void } @@ -343,7 +342,7 @@ define void @test10() "aarch64_pstate_sm_body" { ; CHECK-NEXT: bl callee ; CHECK-NEXT: smstart sm ; CHECK-NEXT: .cfi_restore vg -; CHECK-NEXT: bl callee_sm +; CHECK-NEXT: bl callee ; CHECK-NEXT: .cfi_offset vg, -24 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: bl callee @@ -364,7 +363,7 @@ define void @test10() "aarch64_pstate_sm_body" { ; CHECK-NEXT: .cfi_restore b15 ; CHECK-NEXT: ret call void @callee() - call void @callee_sm() + call void @callee() "aarch64_pstate_sm_enabled" call void @callee() ret void } diff --git a/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll b/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll index 0853325e449af..17d689d2c9eb5 100644 --- a/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll +++ b/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll @@ -1098,11 +1098,11 @@ define void @test_rdsvl_right_after_prologue(i64 %x0) nounwind { ; NO-SVE-CHECK-NEXT: ret %some_alloc = alloca i64, align 8 %rdsvl = tail call i64 @llvm.aarch64.sme.cntsd() - call void @bar(i64 %rdsvl, i64 %x0) + call void @bar(i64 %rdsvl, i64 %x0) "aarch64_pstate_sm_enabled" ret void } -declare void @bar(i64, i64) "aarch64_pstate_sm_enabled" +declare void @bar(i64, i64) ; Ensure we still emit async unwind information with -fno-asynchronous-unwind-tables ; if the function contains a streaming-mode change. diff --git a/llvm/test/CodeGen/AArch64/sme-zt0-state.ll b/llvm/test/CodeGen/AArch64/sme-zt0-state.ll index 63577e4d217a8..7361e850d713e 100644 --- a/llvm/test/CodeGen/AArch64/sme-zt0-state.ll +++ b/llvm/test/CodeGen/AArch64/sme-zt0-state.ll @@ -1,13 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -start-after=simplifycfg -enable-tail-merge=false -verify-machineinstrs < %s | FileCheck %s +declare void @callee(); + ; ; Private-ZA Callee ; ; Expect spill & fill of ZT0 around call ; Expect smstop/smstart za around call -define void @zt0_in_caller_no_state_callee(ptr %callee) "aarch64_in_zt0" nounwind { +define void @zt0_in_caller_no_state_callee() "aarch64_in_zt0" nounwind { ; CHECK-LABEL: zt0_in_caller_no_state_callee: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #80 @@ -15,20 +17,20 @@ define void @zt0_in_caller_no_state_callee(ptr %callee) "aarch64_in_zt0" nounwin ; CHECK-NEXT: mov x19, sp ; CHECK-NEXT: str zt0, [x19] ; CHECK-NEXT: smstop za -; CHECK-NEXT: blr x0 +; CHECK-NEXT: bl callee ; CHECK-NEXT: smstart za ; CHECK-NEXT: ldr zt0, [x19] ; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: ret - call void %callee(); + call void @callee(); ret void; } ; Expect spill & fill of ZT0 around call ; Expect setup and restore lazy-save around call ; Expect smstart za after call -define void @za_zt0_shared_caller_no_state_callee(ptr %callee) "aarch64_inout_za" "aarch64_in_zt0" nounwind { +define void @za_zt0_shared_caller_no_state_callee() "aarch64_inout_za" "aarch64_in_zt0" nounwind { ; CHECK-LABEL: za_zt0_shared_caller_no_state_callee: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill @@ -47,7 +49,7 @@ define void @za_zt0_shared_caller_no_state_callee(ptr %callee) "aarch64_inout_za ; CHECK-NEXT: sturh w8, [x29, #-8] ; CHECK-NEXT: msr TPIDR2_EL0, x9 ; CHECK-NEXT: str zt0, [x19] -; CHECK-NEXT: blr x0 +; CHECK-NEXT: bl callee ; CHECK-NEXT: smstart za ; CHECK-NEXT: ldr zt0, [x19] ; CHECK-NEXT: mrs x8, TPIDR2_EL0 @@ -61,7 +63,7 @@ define void @za_zt0_shared_caller_no_state_callee(ptr %callee) "aarch64_inout_za ; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: ret - call void %callee(); + call void @callee(); ret void; } @@ -70,43 +72,43 @@ define void @za_zt0_shared_caller_no_state_callee(ptr %callee) "aarch64_inout_za ; ; Caller and callee have shared ZT0 state, no spill/fill of ZT0 required -define void @zt0_shared_caller_zt0_shared_callee(ptr %callee) "aarch64_in_zt0" nounwind { +define void @zt0_shared_caller_zt0_shared_callee() "aarch64_in_zt0" nounwind { ; CHECK-LABEL: zt0_shared_caller_zt0_shared_callee: ; CHECK: // %bb.0: ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: blr x0 +; CHECK-NEXT: bl callee ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - call void %callee() "aarch64_in_zt0"; + call void @callee() "aarch64_in_zt0"; ret void; } ; Expect spill & fill of ZT0 around call -define void @za_zt0_shared_caller_za_shared_callee(ptr %callee) "aarch64_inout_za" "aarch64_in_zt0" nounwind { +define void @za_zt0_shared_caller_za_shared_callee() "aarch64_inout_za" "aarch64_in_zt0" nounwind { ; CHECK-LABEL: za_zt0_shared_caller_za_shared_callee: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #80 ; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, sp ; CHECK-NEXT: str zt0, [x19] -; CHECK-NEXT: blr x0 +; CHECK-NEXT: bl callee ; CHECK-NEXT: ldr zt0, [x19] ; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: ret - call void %callee() "aarch64_inout_za"; + call void @callee() "aarch64_inout_za"; ret void; } ; Caller and callee have shared ZA & ZT0 -define void @za_zt0_shared_caller_za_zt0_shared_callee(ptr %callee) "aarch64_inout_za" "aarch64_in_zt0" nounwind { +define void @za_zt0_shared_caller_za_zt0_shared_callee() "aarch64_inout_za" "aarch64_in_zt0" nounwind { ; CHECK-LABEL: za_zt0_shared_caller_za_zt0_shared_callee: ; CHECK: // %bb.0: ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: blr x0 +; CHECK-NEXT: bl callee ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - call void %callee() "aarch64_inout_za" "aarch64_in_zt0"; + call void @callee() "aarch64_inout_za" "aarch64_in_zt0"; ret void; } @@ -114,7 +116,7 @@ define void @za_zt0_shared_caller_za_zt0_shared_callee(ptr %callee) "aarch64_ino ; Expect spill & fill of ZT0 around call ; Expect smstop/smstart za around call -define void @zt0_in_caller_zt0_new_callee(ptr %callee) "aarch64_in_zt0" nounwind { +define void @zt0_in_caller_zt0_new_callee() "aarch64_in_zt0" nounwind { ; CHECK-LABEL: zt0_in_caller_zt0_new_callee: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #80 @@ -122,13 +124,13 @@ define void @zt0_in_caller_zt0_new_callee(ptr %callee) "aarch64_in_zt0" nounwind ; CHECK-NEXT: mov x19, sp ; CHECK-NEXT: str zt0, [x19] ; CHECK-NEXT: smstop za -; CHECK-NEXT: blr x0 +; CHECK-NEXT: bl callee ; CHECK-NEXT: smstart za ; CHECK-NEXT: ldr zt0, [x19] ; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: ret - call void %callee() "aarch64_new_zt0"; + call void @callee() "aarch64_new_zt0"; ret void; } @@ -138,7 +140,7 @@ define void @zt0_in_caller_zt0_new_callee(ptr %callee) "aarch64_in_zt0" nounwind ; Expect smstart ZA & clear ZT0 ; Expect spill & fill of ZT0 around call ; Before return, expect smstop ZA -define void @zt0_new_caller_zt0_new_callee(ptr %callee) "aarch64_new_zt0" nounwind { +define void @zt0_new_caller_zt0_new_callee() "aarch64_new_zt0" nounwind { ; CHECK-LABEL: zt0_new_caller_zt0_new_callee: ; CHECK: // %bb.0: // %prelude ; CHECK-NEXT: sub sp, sp, #80 @@ -154,14 +156,14 @@ define void @zt0_new_caller_zt0_new_callee(ptr %callee) "aarch64_new_zt0" nounwi ; CHECK-NEXT: mov x19, sp ; CHECK-NEXT: str zt0, [x19] ; CHECK-NEXT: smstop za -; CHECK-NEXT: blr x0 +; CHECK-NEXT: bl callee ; CHECK-NEXT: smstart za ; CHECK-NEXT: ldr zt0, [x19] ; CHECK-NEXT: smstop za ; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: ret - call void %callee() "aarch64_new_zt0"; + call void @callee() "aarch64_new_zt0"; ret void; } @@ -205,7 +207,7 @@ declare {i64, i64} @__arm_sme_state() ; Expect commit of lazy-save if ZA is dormant ; Expect smstart ZA & clear ZT0 ; Before return, expect smstop ZA -define void @zt0_new_caller(ptr %callee) "aarch64_new_zt0" nounwind { +define void @zt0_new_caller() "aarch64_new_zt0" nounwind { ; CHECK-LABEL: zt0_new_caller: ; CHECK: // %bb.0: // %prelude ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -217,18 +219,18 @@ define void @zt0_new_caller(ptr %callee) "aarch64_new_zt0" nounwind { ; CHECK-NEXT: .LBB8_2: ; CHECK-NEXT: smstart za ; CHECK-NEXT: zero { zt0 } -; CHECK-NEXT: blr x0 +; CHECK-NEXT: bl callee ; CHECK-NEXT: smstop za ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - call void %callee() "aarch64_in_zt0"; + call void @callee() "aarch64_in_zt0"; ret void; } ; Expect commit of lazy-save if ZA is dormant ; Expect smstart ZA, clear ZA & clear ZT0 ; Before return, expect smstop ZA -define void @new_za_zt0_caller(ptr %callee) "aarch64_new_za" "aarch64_new_zt0" nounwind { +define void @new_za_zt0_caller() "aarch64_new_za" "aarch64_new_zt0" nounwind { ; CHECK-LABEL: new_za_zt0_caller: ; CHECK: // %bb.0: // %prelude ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -241,36 +243,36 @@ define void @new_za_zt0_caller(ptr %callee) "aarch64_new_za" "aarch64_new_zt0" n ; CHECK-NEXT: smstart za ; CHECK-NEXT: zero {za} ; CHECK-NEXT: zero { zt0 } -; CHECK-NEXT: blr x0 +; CHECK-NEXT: bl callee ; CHECK-NEXT: smstop za ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - call void %callee() "aarch64_inout_za" "aarch64_in_zt0"; + call void @callee() "aarch64_inout_za" "aarch64_in_zt0"; ret void; } ; Expect clear ZA on entry -define void @new_za_shared_zt0_caller(ptr %callee) "aarch64_new_za" "aarch64_in_zt0" nounwind { +define void @new_za_shared_zt0_caller() "aarch64_new_za" "aarch64_in_zt0" nounwind { ; CHECK-LABEL: new_za_shared_zt0_caller: ; CHECK: // %bb.0: ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: zero {za} -; CHECK-NEXT: blr x0 +; CHECK-NEXT: bl callee ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - call void %callee() "aarch64_inout_za" "aarch64_in_zt0"; + call void @callee() "aarch64_inout_za" "aarch64_in_zt0"; ret void; } ; Expect clear ZT0 on entry -define void @shared_za_new_zt0(ptr %callee) "aarch64_inout_za" "aarch64_new_zt0" nounwind { +define void @shared_za_new_zt0() "aarch64_inout_za" "aarch64_new_zt0" nounwind { ; CHECK-LABEL: shared_za_new_zt0: ; CHECK: // %bb.0: ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: zero { zt0 } -; CHECK-NEXT: blr x0 +; CHECK-NEXT: bl callee ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret - call void %callee() "aarch64_inout_za" "aarch64_in_zt0"; + call void @callee() "aarch64_inout_za" "aarch64_in_zt0"; ret void; } diff --git a/llvm/test/CodeGen/AArch64/stack-tagging-prologue.ll b/llvm/test/CodeGen/AArch64/stack-tagging-prologue.ll index 132caef3763ec..3eed6d45c7710 100644 --- a/llvm/test/CodeGen/AArch64/stack-tagging-prologue.ll +++ b/llvm/test/CodeGen/AArch64/stack-tagging-prologue.ll @@ -23,7 +23,7 @@ entry: ; INSTR-LABEL: define void @OneVar( ; INSTR: [[BASE:%.*]] = call ptr @llvm.aarch64.irg.sp(i64 0) -; INSTR: [[TLS:%.*]] = call ptr @llvm.thread.pointer.p0() +; INSTR: [[TLS:%.*]] = call ptr @llvm.thread.pointer() ; INSTR: [[TLS_SLOT:%.*]] = getelementptr i8, ptr [[TLS]], i32 -24 ; INSTR: [[TLS_VALUE:%.*]] = load i64, ptr [[TLS_SLOT]], align 8 ; INSTR: [[FP:%.*]] = call ptr @llvm.frameaddress.p0(i32 0) diff --git a/llvm/test/CodeGen/AArch64/vector-fcvt.ll b/llvm/test/CodeGen/AArch64/vector-fcvt.ll index c3b7161feefb5..d31659c30f21d 100644 --- a/llvm/test/CodeGen/AArch64/vector-fcvt.ll +++ b/llvm/test/CodeGen/AArch64/vector-fcvt.ll @@ -243,10 +243,10 @@ define <8 x double> @sitofp_v8i8_double(<8 x i8> %a) { ; CHECK-LABEL: sitofp_v8i8_double: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov b1, v0.b[0] -; CHECK-NEXT: mov b2, v0.b[2] -; CHECK-NEXT: mov b3, v0.b[4] -; CHECK-NEXT: mov b4, v0.b[6] +; CHECK-NEXT: mov v1.b[0], v0.b[0] +; CHECK-NEXT: mov v2.b[0], v0.b[2] +; CHECK-NEXT: mov v3.b[0], v0.b[4] +; CHECK-NEXT: mov v4.b[0], v0.b[6] ; CHECK-NEXT: mov v1.b[4], v0.b[1] ; CHECK-NEXT: mov v2.b[4], v0.b[3] ; CHECK-NEXT: mov v3.b[4], v0.b[5] @@ -276,14 +276,14 @@ define <16 x double> @sitofp_v16i8_double(<16 x i8> %a) { ; CHECK-LABEL: sitofp_v16i8_double: ; CHECK: // %bb.0: ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: mov b2, v0.b[0] -; CHECK-NEXT: mov b3, v0.b[2] -; CHECK-NEXT: mov b4, v0.b[4] -; CHECK-NEXT: mov b5, v0.b[6] -; CHECK-NEXT: mov b6, v1.b[0] -; CHECK-NEXT: mov b7, v1.b[2] -; CHECK-NEXT: mov b16, v1.b[4] -; CHECK-NEXT: mov b17, v1.b[6] +; CHECK-NEXT: mov v2.b[0], v0.b[0] +; CHECK-NEXT: mov v3.b[0], v0.b[2] +; CHECK-NEXT: mov v4.b[0], v0.b[4] +; CHECK-NEXT: mov v5.b[0], v0.b[6] +; CHECK-NEXT: mov v6.b[0], v1.b[0] +; CHECK-NEXT: mov v7.b[0], v1.b[2] +; CHECK-NEXT: mov v16.b[0], v1.b[4] +; CHECK-NEXT: mov v17.b[0], v1.b[6] ; CHECK-NEXT: mov v2.b[4], v0.b[1] ; CHECK-NEXT: mov v3.b[4], v0.b[3] ; CHECK-NEXT: mov v4.b[4], v0.b[5] @@ -396,10 +396,10 @@ define <8 x double> @uitofp_v8i8_double(<8 x i8> %a) { ; CHECK-LABEL: uitofp_v8i8_double: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov b2, v0.b[0] -; CHECK-NEXT: mov b3, v0.b[2] -; CHECK-NEXT: mov b4, v0.b[4] -; CHECK-NEXT: mov b5, v0.b[6] +; CHECK-NEXT: mov v2.b[0], v0.b[0] +; CHECK-NEXT: mov v3.b[0], v0.b[2] +; CHECK-NEXT: mov v4.b[0], v0.b[4] +; CHECK-NEXT: mov v5.b[0], v0.b[6] ; CHECK-NEXT: movi d1, #0x0000ff000000ff ; CHECK-NEXT: mov v2.b[4], v0.b[1] ; CHECK-NEXT: mov v3.b[4], v0.b[3] @@ -426,15 +426,15 @@ define <16 x double> @uitofp_v16i8_double(<16 x i8> %a) { ; CHECK-LABEL: uitofp_v16i8_double: ; CHECK: // %bb.0: ; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: mov b3, v0.b[0] -; CHECK-NEXT: mov b4, v0.b[2] -; CHECK-NEXT: mov b5, v0.b[4] -; CHECK-NEXT: mov b6, v0.b[6] +; CHECK-NEXT: mov v3.b[0], v0.b[0] +; CHECK-NEXT: mov v4.b[0], v0.b[2] +; CHECK-NEXT: mov v5.b[0], v0.b[4] +; CHECK-NEXT: mov v6.b[0], v0.b[6] ; CHECK-NEXT: movi d1, #0x0000ff000000ff -; CHECK-NEXT: mov b7, v2.b[0] -; CHECK-NEXT: mov b16, v2.b[2] -; CHECK-NEXT: mov b17, v2.b[4] -; CHECK-NEXT: mov b18, v2.b[6] +; CHECK-NEXT: mov v7.b[0], v2.b[0] +; CHECK-NEXT: mov v16.b[0], v2.b[2] +; CHECK-NEXT: mov v17.b[0], v2.b[4] +; CHECK-NEXT: mov v18.b[0], v2.b[6] ; CHECK-NEXT: mov v3.b[4], v0.b[1] ; CHECK-NEXT: mov v4.b[4], v0.b[3] ; CHECK-NEXT: mov v5.b[4], v0.b[5] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll index 75913d5219af2..aae999ec0a99a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll @@ -9,46 +9,46 @@ define amdgpu_kernel void @call_debug_loc() { ; CHECK: bb.1.entry: ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2, debug-location !8 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1, debug-location !8 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0, debug-location !8 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16, debug-location !8 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15, debug-location !8 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14, debug-location !8 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11, debug-location !8 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7, debug-location !8 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5, debug-location !8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2, debug-location !7 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1, debug-location !7 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0, debug-location !7 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16, debug-location !7 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15, debug-location !7 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14, debug-location !7 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11, debug-location !7 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7, debug-location !7 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5, debug-location !7 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, debug-location !8 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[COPY8]], debug-location !8 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[COPY7]], debug-location !8 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[COPY6]], debug-location !8 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[COPY5]], debug-location !8 - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[COPY4]], debug-location !8 - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[COPY3]], debug-location !8 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF debug-location !8 - ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 10, debug-location !8 - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]], debug-location !8 - ; CHECK-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY16]], [[COPY1]], implicit $exec, debug-location !8 - ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 20, debug-location !8 - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]], debug-location !8 - ; CHECK-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY17]], [[COPY]], implicit $exec, debug-location !8 - ; CHECK-NEXT: [[V_OR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR3_B32_e64 [[COPY2]], [[V_LSHLREV_B32_e64_]], [[V_LSHLREV_B32_e64_1]], implicit $exec, debug-location !8 - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3, debug-location !8 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]], debug-location !8 - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]], debug-location !8 - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]], debug-location !8 - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY9]], debug-location !8 - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]], debug-location !8 - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]], debug-location !8 - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]], debug-location !8 - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]], debug-location !8 - ; CHECK-NEXT: $sgpr15 = COPY [[DEF]], debug-location !8 - ; CHECK-NEXT: $vgpr31 = COPY [[V_OR3_B32_e64_]], debug-location !8 - ; CHECK-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee, target-flags(amdgpu-gotprel32-hi) @callee, implicit-def $scc, debug-location !8 - ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET]], 0, 0, debug-location !8 :: (dereferenceable invariant load (p0) from got, addrspace 4) - ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent SI_CALL [[S_LOAD_DWORDX2_IMM]], @callee, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, debug-location !8 - ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, debug-location !8 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, debug-location !7 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[COPY8]], debug-location !7 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[COPY7]], debug-location !7 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[COPY6]], debug-location !7 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[COPY5]], debug-location !7 + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[COPY4]], debug-location !7 + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[COPY3]], debug-location !7 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF debug-location !7 + ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 10, debug-location !7 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]], debug-location !7 + ; CHECK-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY16]], [[COPY1]], implicit $exec, debug-location !7 + ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 20, debug-location !7 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]], debug-location !7 + ; CHECK-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY17]], [[COPY]], implicit $exec, debug-location !7 + ; CHECK-NEXT: [[V_OR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR3_B32_e64 [[COPY2]], [[V_LSHLREV_B32_e64_]], [[V_LSHLREV_B32_e64_1]], implicit $exec, debug-location !7 + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3, debug-location !7 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]], debug-location !7 + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]], debug-location !7 + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]], debug-location !7 + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY9]], debug-location !7 + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]], debug-location !7 + ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]], debug-location !7 + ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]], debug-location !7 + ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]], debug-location !7 + ; CHECK-NEXT: $sgpr15 = COPY [[DEF]], debug-location !7 + ; CHECK-NEXT: $vgpr31 = COPY [[V_OR3_B32_e64_]], debug-location !7 + ; CHECK-NEXT: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee, target-flags(amdgpu-gotprel32-hi) @callee, implicit-def $scc, debug-location !7 + ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[SI_PC_ADD_REL_OFFSET]], 0, 0, debug-location !7 :: (dereferenceable invariant load (p0) from got, addrspace 4) + ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent SI_CALL [[S_LOAD_DWORDX2_IMM]], @callee, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, debug-location !7 + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc, debug-location !7 ; CHECK-NEXT: S_ENDPGM 0 entry: call void @callee(), !dbg !6 @@ -60,11 +60,11 @@ define void @returnaddress_debug_loc(ptr addrspace(1) %ptr) { ; CHECK: bb.1.entry: ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31, debug-location !8 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31, debug-location !7 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY]], debug-location !8 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY]], debug-location !7 ; CHECK-NEXT: GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE]], [[COPY3]], 0, 0, implicit $exec :: (store (p0) into %ir.ptr, addrspace 1) ; CHECK-NEXT: SI_RETURN entry: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll index 3da3355e51cf9..96c9f40e317ea 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll @@ -7,7 +7,7 @@ define amdgpu_kernel void @asm_convergent() convergent{ ; CHECK-NEXT: liveins: $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: INLINEASM &s_barrier, 33 /* sideeffect isconvergent attdialect */, !2 + ; CHECK-NEXT: INLINEASM &s_barrier, 33 /* sideeffect isconvergent attdialect */, !1 ; CHECK-NEXT: S_ENDPGM 0 call void asm sideeffect "s_barrier", ""() convergent, !srcloc !0 ret void @@ -19,8 +19,8 @@ define amdgpu_kernel void @asm_simple_memory_clobber() { ; CHECK-NEXT: liveins: $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: INLINEASM &"", 25 /* sideeffect mayload maystore attdialect */, !2 - ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, !2 + ; CHECK-NEXT: INLINEASM &"", 25 /* sideeffect mayload maystore attdialect */, !1 + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, !1 ; CHECK-NEXT: S_ENDPGM 0 call void asm sideeffect "", "~{memory}"(), !srcloc !0 call void asm sideeffect "", ""(), !srcloc !0 @@ -33,7 +33,7 @@ define amdgpu_kernel void @asm_simple_vgpr_clobber() { ; CHECK-NEXT: liveins: $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, 7", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $vgpr0, !2 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 v0, 7", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $vgpr0, !1 ; CHECK-NEXT: S_ENDPGM 0 call void asm sideeffect "v_mov_b32 v0, 7", "~{v0}"(), !srcloc !0 ret void @@ -45,7 +45,7 @@ define amdgpu_kernel void @asm_simple_sgpr_clobber() { ; CHECK-NEXT: liveins: $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, 7", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $sgpr0, !2 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 s0, 7", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $sgpr0, !1 ; CHECK-NEXT: S_ENDPGM 0 call void asm sideeffect "s_mov_b32 s0, 7", "~{s0}"(), !srcloc !0 ret void @@ -57,7 +57,7 @@ define amdgpu_kernel void @asm_simple_agpr_clobber() { ; CHECK-NEXT: liveins: $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: INLINEASM &"; def a0", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $agpr0, !2 + ; CHECK-NEXT: INLINEASM &"; def a0", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $agpr0, !1 ; CHECK-NEXT: S_ENDPGM 0 call void asm sideeffect "; def a0", "~{a0}"(), !srcloc !0 ret void @@ -66,7 +66,7 @@ define amdgpu_kernel void @asm_simple_agpr_clobber() { define i32 @asm_vgpr_early_clobber() { ; CHECK-LABEL: name: asm_vgpr_early_clobber ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 2228235 /* regdef-ec:VGPR_32 */, def early-clobber %8, 2228235 /* regdef-ec:VGPR_32 */, def early-clobber %9, !2 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 2228235 /* regdef-ec:VGPR_32 */, def early-clobber %8, 2228235 /* regdef-ec:VGPR_32 */, def early-clobber %9, !1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %8 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %9 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll index 296eeaed0a287..101bb6c0ed123 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll @@ -5,7 +5,8 @@ define i32 @reloc_constant() { ; CHECK-LABEL: name: reloc_constant ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.reloc.constant), !1 + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.reloc.constant), !0 + ; We cannot have any specific metadata check here as ConstantAsMetadata is printed as ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.reloc.constant), <0x{{[0-9a-f]+}}> ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[INT]], [[INT1]] ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-legalize-range-metadata.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-legalize-range-metadata.ll index f1800dc6afcb9..b6b4301dadc7a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-legalize-range-metadata.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-legalize-range-metadata.ll @@ -77,7 +77,7 @@ define <2 x i64> @global_load_v2i64_align16__rangemd(ptr addrspace(1) %ptr) { ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[MV]](p1) :: (load (<2 x s64>) from %ir.ptr, !range !3, addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[MV]](p1) :: (load (<2 x s64>) from %ir.ptr, !range !2, addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -119,7 +119,7 @@ define i32 @global_sextload_i8_align1__rangemd(ptr addrspace(1) %ptr) { ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p1) :: (load (s8) from %ir.ptr, !range !1, addrspace 1) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p1) :: (load (s8) from %ir.ptr, !range !0, addrspace 1) ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %load = load i8, ptr addrspace(1) %ptr, align 1, !range !0, !noundef !1 @@ -135,7 +135,7 @@ define i32 @global_zextload_i8_align1__rangemd(ptr addrspace(1) %ptr) { ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p1) :: (load (s8) from %ir.ptr, !range !5, addrspace 1) + ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p1) :: (load (s8) from %ir.ptr, !range !4, addrspace 1) ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %load = load i8, ptr addrspace(1) %ptr, align 1, !range !4, !noundef !1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/mmra.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/mmra.ll index 80243d658ae00..44b12a9f6fe81 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/mmra.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/mmra.ll @@ -12,11 +12,11 @@ define void @fence_loads(ptr %ptr) { ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK-NEXT: ATOMIC_FENCE 5, 1, mmra !1 - ; CHECK-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr, mmra !2 :: (load acquire (s8) from %ir.ptr, align 4) + ; CHECK-NEXT: ATOMIC_FENCE 5, 1, mmra !0 + ; CHECK-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr, mmra !1 :: (load acquire (s8) from %ir.ptr, align 4) ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; CHECK-NEXT: FLAT_STORE_BYTE [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr, mmra !3 :: (store release (s8) into %ir.ptr, align 4) + ; CHECK-NEXT: FLAT_STORE_BYTE [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr, mmra !2 :: (store release (s8) into %ir.ptr, align 4) ; CHECK-NEXT: SI_RETURN fence release, !mmra !0 %ld = load atomic i8, ptr %ptr acquire, align 4, !mmra !2 diff --git a/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll b/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll index b2ebf2e33e29f..92d0a05f35732 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-kernel-and-module-lds.ll @@ -23,10 +23,10 @@ ;. define amdgpu_kernel void @k0() #0 { ; CHECK-LABEL: @k0() #0 -; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 3), align 2, !alias.scope !2, !noalias !5 -; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 2), align 4, !alias.scope !9, !noalias !10 -; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 1), align 16, !alias.scope !11, !noalias !12 -; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, align 16, !alias.scope !13, !noalias !14 +; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 3), align 2, !alias.scope !1, !noalias !4 +; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 2), align 4, !alias.scope !8, !noalias !9 +; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 1), align 16, !alias.scope !10, !noalias !11 +; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, align 16, !alias.scope !12, !noalias !13 ; CHECK-NEXT: ret void store i8 1, ptr addrspace(3) @lds.size.1.align.1, align 1 @@ -41,9 +41,9 @@ define amdgpu_kernel void @k0() #0 { define amdgpu_kernel void @k1() #0 { ; CHECK-LABEL: @k1() #1 -; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 2), align 4, !alias.scope !15, !noalias !18 -; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 1), align 16, !alias.scope !21, !noalias !22 -; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, align 16, !alias.scope !23, !noalias !24 +; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 2), align 4, !alias.scope !14, !noalias !17 +; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 1), align 16, !alias.scope !20, !noalias !21 +; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, align 16, !alias.scope !22, !noalias !23 ; CHECK-NEXT: ret void ; store i8 2, ptr addrspace(3) @lds.size.2.align.2, align 2 @@ -83,8 +83,8 @@ define amdgpu_kernel void @calls_f0() { define void @f0() { ; CHECK-LABEL: define void @f0() -; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.module.lds.t, ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 8, !noalias !25 -; CHECK-NEXT: store i8 8, ptr addrspace(3) @llvm.amdgcn.module.lds, align 8, !noalias !30 +; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.module.lds.t, ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 8, !noalias !24 +; CHECK-NEXT: store i8 8, ptr addrspace(3) @llvm.amdgcn.module.lds, align 8, !noalias !29 ; CHECK-NEXT: ret void store i8 1, ptr addrspace(3) @lds.size.1.align.1, align 1 diff --git a/llvm/test/CodeGen/AMDGPU/lower-kernel-lds.ll b/llvm/test/CodeGen/AMDGPU/lower-kernel-lds.ll index 37ae05bfab86f..70142fa4b5b29 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-kernel-lds.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-kernel-lds.ll @@ -16,10 +16,10 @@ ;. define amdgpu_kernel void @k0() { ; CHECK-LABEL: @k0( -; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 3), align 2, !alias.scope !2, !noalias !5 -; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 2), align 4, !alias.scope !9, !noalias !10 -; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 1), align 16, !alias.scope !11, !noalias !12 -; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, align 16, !alias.scope !13, !noalias !14 +; CHECK-NEXT: store i8 1, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 3), align 2, !alias.scope !1, !noalias !4 +; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 2), align 4, !alias.scope !8, !noalias !9 +; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, i32 0, i32 1), align 16, !alias.scope !10, !noalias !11 +; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k0.lds, align 16, !alias.scope !12, !noalias !13 ; CHECK-NEXT: ret void store i8 1, ptr addrspace(3) @lds.size.1.align.1, align 1 @@ -34,9 +34,9 @@ define amdgpu_kernel void @k0() { define amdgpu_kernel void @k1() { ; CHECK-LABEL: @k1( -; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 2), align 4, !alias.scope !15, !noalias !18 -; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 1), align 16, !alias.scope !21, !noalias !22 -; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, align 16, !alias.scope !23, !noalias !24 +; CHECK-NEXT: store i8 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 2), align 4, !alias.scope !14, !noalias !17 +; CHECK-NEXT: store i8 4, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, i32 0, i32 1), align 16, !alias.scope !20, !noalias !21 +; CHECK-NEXT: store i8 16, ptr addrspace(3) @llvm.amdgcn.kernel.k1.lds, align 16, !alias.scope !22, !noalias !23 ; CHECK-NEXT: ret void ; store i8 2, ptr addrspace(3) @lds.size.2.align.2, align 2 diff --git a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll index dbab9e520f989..4ab05c2923fdb 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll @@ -34,20 +34,19 @@ define protected amdgpu_kernel void @test(ptr addrspace(1) nocapture %ptr.coerce ; GCN-NEXT: s_endpgm ; CHECK-LABEL: define protected amdgpu_kernel void @test( ; CHECK-SAME: ptr addrspace(1) captures(none) [[PTR_COERCE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: store i8 3, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, align 4, !alias.scope [[META2:![0-9]+]], !noalias [[META5:![0-9]+]] -; CHECK-NEXT: tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef align 1 dereferenceable(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_TEST_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), ptr addrspace(3) noundef align 1 dereferenceable(3) @llvm.amdgcn.kernel.test.lds, i64 3, i1 false), !alias.scope [[META7:![0-9]+]], !noalias [[META8:![0-9]+]] -; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_TEST_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), align 4, !alias.scope [[META5]], !noalias [[META2]] +; CHECK-NEXT: entry: +; CHECK-NEXT: store i8 3, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, align 4, !alias.scope !1, !noalias !4 +; CHECK-NEXT: tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef align 1 dereferenceable(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_TEST_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), ptr addrspace(3) noundef align 1 dereferenceable(3) @llvm.amdgcn.kernel.test.lds, i64 3, i1 false), !alias.scope !6, !noalias !7 +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_TEST_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), align 4, !alias.scope !4, !noalias !1 ; CHECK-NEXT: [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 3 -; CHECK-NEXT: store i8 2, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, align 4, !alias.scope [[META2]], !noalias [[META5]] -; CHECK-NEXT: tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef align 1 dereferenceable(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_TEST_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), ptr addrspace(3) noundef align 1 dereferenceable(3) @llvm.amdgcn.kernel.test.lds, i64 3, i1 false), !alias.scope [[META7]], !noalias [[META8]] -; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_TEST_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), align 4, !alias.scope [[META5]], !noalias [[META2]] +; CHECK-NEXT: store i8 2, ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, align 4, !alias.scope !1, !noalias !4 +; CHECK-NEXT: tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef align 1 dereferenceable(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_TEST_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), ptr addrspace(3) noundef align 1 dereferenceable(3) @llvm.amdgcn.kernel.test.lds, i64 3, i1 false), !alias.scope !6, !noalias !7 +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_TEST_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.test.lds, i32 0, i32 2), align 4, !alias.scope !4, !noalias !1 ; CHECK-NEXT: [[CMP_I_I19:%.*]] = icmp eq i8 [[TMP1]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[CMP_I_I19]], [[CMP_I_I]] ; CHECK-NEXT: [[FROMBOOL8:%.*]] = zext i1 [[TMP2]] to i8 ; CHECK-NEXT: store i8 [[FROMBOOL8]], ptr addrspace(1) [[PTR_COERCE]], align 1 ; CHECK-NEXT: ret void -; entry: store i8 3, ptr addrspace(3) @_f1, align 1 tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef align 1 dereferenceable(3) @_f2, ptr addrspace(3) noundef align 1 dereferenceable(3) @_f1, i64 3, i1 false) @@ -64,15 +63,17 @@ entry: } declare void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg) #1 + ;. ; CHECK: attributes #[[ATTR0]] = { "amdgpu-lds-size"="7" } ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } ;. -; CHECK: [[META2]] = !{[[META3:![0-9]+]]} -; CHECK: [[META3]] = distinct !{[[META3]], [[META4:![0-9]+]]} -; CHECK: [[META4]] = distinct !{[[META4]]} -; CHECK: [[META5]] = !{[[META6:![0-9]+]]} -; CHECK: [[META6]] = distinct !{[[META6]], [[META4]]} -; CHECK: [[META7]] = !{[[META6]], [[META3]]} -; CHECK: [[META8]] = !{} +; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 1} +; CHECK: [[META1:![0-9]+]] = !{!2} +; CHECK: [[META2:![0-9]+]] = distinct !{!2, !3} +; CHECK: [[META3:![0-9]+]] = distinct !{!3} +; CHECK: [[META4:![0-9]+]] = !{!5} +; CHECK: [[META5:![0-9]+]] = distinct !{!5, !3} +; CHECK: [[META6:![0-9]+]] = !{!5, !2} +; CHECK: [[META7:![0-9]+]] = !{} ;. diff --git a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll index 7437ce347d1a5..154c798a44f93 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll @@ -9,12 +9,12 @@ define amdgpu_kernel void @no_clobber_ds_load_stores_x2_preexisting_aa(ptr addrs ; CHECK-LABEL: define amdgpu_kernel void @no_clobber_ds_load_stores_x2_preexisting_aa( ; CHECK-SAME: ptr addrspace(1) [[ARG:%.*]], i32 [[I:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: bb: -; CHECK-NEXT: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, align 16, !tbaa [[TBAA2:![0-9]+]], !noalias [[META7:![0-9]+]] +; CHECK-NEXT: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, align 16, !tbaa [[TBAA1:![0-9]+]], !noalias !6 ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 [[I]] -; CHECK-NEXT: [[VAL_A:%.*]] = load i32, ptr addrspace(3) [[GEP_A]], align 4, !tbaa [[TBAA2]], !noalias [[META7]] -; CHECK-NEXT: store i32 2, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X2_PREEXISTING_AA_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 1), align 16, !tbaa [[TBAA2]], !noalias [[META12:![0-9]+]] +; CHECK-NEXT: [[VAL_A:%.*]] = load i32, ptr addrspace(3) [[GEP_A]], align 4, !tbaa [[TBAA1]], !noalias !6 +; CHECK-NEXT: store i32 2, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X2_PREEXISTING_AA_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 1), align 16, !tbaa [[TBAA1]], !noalias !11 ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X2_PREEXISTING_AA_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 1), i32 0, i32 [[I]] -; CHECK-NEXT: [[VAL_B:%.*]] = load i32, ptr addrspace(3) [[GEP_B]], align 4, !tbaa [[TBAA2]], !noalias [[META12]] +; CHECK-NEXT: [[VAL_B:%.*]] = load i32, ptr addrspace(3) [[GEP_B]], align 4, !tbaa [[TBAA1]], !noalias !11 ; CHECK-NEXT: [[VAL:%.*]] = add i32 [[VAL_A]], [[VAL_B]] ; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[ARG]], align 4 ; CHECK-NEXT: ret void @@ -42,18 +42,17 @@ bb: !8 = !{!"omnipotent char", !9, i64 0} !9 = !{!"Simple C++ TBAA"} -; CHECK: !0 = !{i32 0, i32 1} -; CHECK: !1 = !{i32 1, !"amdgpu.lowered_lds", i32 1} -; CHECK: !2 = !{!3, !4, i64 0} -; CHECK: !3 = !{!"no_clobber_ds_load_stores_x2_preexisting_aa", !4, i64 0} -; CHECK: !4 = !{!"int", !5, i64 0} -; CHECK: !5 = !{!"omnipotent char", !6, i64 0} -; CHECK: !6 = !{!"Simple C++ TBAA"} -; CHECK: !7 = !{!8, !10} -; CHECK: !8 = distinct !{!8, !9} -; CHECK: !9 = distinct !{!9} -; CHECK: !10 = distinct !{!10, !11} -; CHECK: !11 = distinct !{!11} -; CHECK: !12 = !{!13, !14} -; CHECK: !13 = distinct !{!13, !9} -; CHECK: !14 = distinct !{!14, !11} +; CHECK:!0 = !{i32 0, i32 1} +; CHECK:!1 = !{!2, !3, i64 0} +; CHECK:!2 = !{!"no_clobber_ds_load_stores_x2_preexisting_aa", !3, i64 0} +; CHECK:!3 = !{!"int", !4, i64 0} +; CHECK:!4 = !{!"omnipotent char", !5, i64 0} +; CHECK:!5 = !{!"Simple C++ TBAA"} +; CHECK:!6 = !{!7, !9} +; CHECK:!7 = distinct !{!7, !8} +; CHECK:!8 = distinct !{!8} +; CHECK:!9 = distinct !{!9, !10} +; CHECK:!10 = distinct !{!10} +; CHECK:!11 = !{!12, !13} +; CHECK:!12 = distinct !{!12, !8} +; CHECK:!13 = distinct !{!13, !10} diff --git a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa.ll b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa.ll index 927ef687bc8d9..24c1bfb8d50f0 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa.ll @@ -14,12 +14,12 @@ define amdgpu_kernel void @no_clobber_ds_load_stores_x2(ptr addrspace(1) %arg, i ; CHECK-LABEL: define amdgpu_kernel void @no_clobber_ds_load_stores_x2( ; CHECK-SAME: ptr addrspace(1) [[ARG:%.*]], i32 [[I:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: bb: -; CHECK-NEXT: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds, align 16, !alias.scope !2, !noalias !5 +; CHECK-NEXT: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds, align 16, !alias.scope !1, !noalias !4 ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds, i32 0, i32 [[I]] -; CHECK-NEXT: [[VAL_A:%.*]] = load i32, ptr addrspace(3) [[GEP_A]], align 4, !alias.scope !2, !noalias !5 -; CHECK-NEXT: store i32 2, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X2_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds, i32 0, i32 1), align 16, !alias.scope !5, !noalias !2 +; CHECK-NEXT: [[VAL_A:%.*]] = load i32, ptr addrspace(3) [[GEP_A]], align 4, !alias.scope !1, !noalias !4 +; CHECK-NEXT: store i32 2, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X2_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds, i32 0, i32 1), align 16, !alias.scope !4, !noalias !1 ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X2_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds, i32 0, i32 1), i32 0, i32 [[I]] -; CHECK-NEXT: [[VAL_B:%.*]] = load i32, ptr addrspace(3) [[GEP_B]], align 4, !alias.scope !5, !noalias !2 +; CHECK-NEXT: [[VAL_B:%.*]] = load i32, ptr addrspace(3) [[GEP_B]], align 4, !alias.scope !4, !noalias !1 ; CHECK-NEXT: [[VAL:%.*]] = add i32 [[VAL_A]], [[VAL_B]] ; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[ARG]], align 4 ; CHECK-NEXT: ret void @@ -58,15 +58,15 @@ define amdgpu_kernel void @no_clobber_ds_load_stores_x3(ptr addrspace(1) %arg, i ; CHECK-LABEL: define amdgpu_kernel void @no_clobber_ds_load_stores_x3( ; CHECK-SAME: ptr addrspace(1) [[ARG:%.*]], i32 [[I:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: bb: -; CHECK-NEXT: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, align 16, !alias.scope !7, !noalias !10 +; CHECK-NEXT: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, align 16, !alias.scope !6, !noalias !9 ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 [[I]] -; CHECK-NEXT: [[VAL_A:%.*]] = load i32, ptr addrspace(3) [[GEP_A]], align 4, !alias.scope !7, !noalias !10 -; CHECK-NEXT: store i32 2, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X3_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 1), align 16, !alias.scope !13, !noalias !14 +; CHECK-NEXT: [[VAL_A:%.*]] = load i32, ptr addrspace(3) [[GEP_A]], align 4, !alias.scope !6, !noalias !9 +; CHECK-NEXT: store i32 2, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X3_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 1), align 16, !alias.scope !12, !noalias !13 ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X3_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 1), i32 0, i32 [[I]] -; CHECK-NEXT: [[VAL_B:%.*]] = load i32, ptr addrspace(3) [[GEP_B]], align 4, !alias.scope !13, !noalias !14 -; CHECK-NEXT: store i32 3, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X3_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 2), align 16, !alias.scope !15, !noalias !16 +; CHECK-NEXT: [[VAL_B:%.*]] = load i32, ptr addrspace(3) [[GEP_B]], align 4, !alias.scope !12, !noalias !13 +; CHECK-NEXT: store i32 3, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X3_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 2), align 16, !alias.scope !14, !noalias !15 ; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X3_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 2), i32 0, i32 [[I]] -; CHECK-NEXT: [[VAL_C:%.*]] = load i32, ptr addrspace(3) [[GEP_C]], align 4, !alias.scope !15, !noalias !16 +; CHECK-NEXT: [[VAL_C:%.*]] = load i32, ptr addrspace(3) [[GEP_C]], align 4, !alias.scope !14, !noalias !15 ; CHECK-NEXT: [[VAL_1:%.*]] = add i32 [[VAL_A]], [[VAL_B]] ; CHECK-NEXT: [[VAL:%.*]] = add i32 [[VAL_1]], [[VAL_C]] ; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[ARG]], align 4 @@ -111,19 +111,18 @@ bb: } ; CHECK: !0 = !{i32 0, i32 1} -; CHECK: !1 = !{i32 1, !"amdgpu.lowered_lds", i32 1} -; CHECK: !2 = !{!3} -; CHECK: !3 = distinct !{!3, !4} -; CHECK: !4 = distinct !{!4} -; CHECK: !5 = !{!6} -; CHECK: !6 = distinct !{!6, !4} -; CHECK: !7 = !{!8} -; CHECK: !8 = distinct !{!8, !9} -; CHECK: !9 = distinct !{!9} -; CHECK: !10 = !{!11, !12} -; CHECK: !11 = distinct !{!11, !9} -; CHECK: !12 = distinct !{!12, !9} -; CHECK: !13 = !{!11} -; CHECK: !14 = !{!8, !12} -; CHECK: !15 = !{!12} -; CHECK: !16 = !{!8, !11} +; CHECK: !1 = !{!2} +; CHECK: !2 = distinct !{!2, !3} +; CHECK: !3 = distinct !{!3} +; CHECK: !4 = !{!5} +; CHECK: !5 = distinct !{!5, !3} +; CHECK: !6 = !{!7} +; CHECK: !7 = distinct !{!7, !8} +; CHECK: !8 = distinct !{!8} +; CHECK: !9 = !{!10, !11} +; CHECK: !10 = distinct !{!10, !8} +; CHECK: !11 = distinct !{!11, !8} +; CHECK: !12 = !{!10} +; CHECK: !13 = !{!7, !11} +; CHECK: !14 = !{!11} +; CHECK: !15 = !{!7, !10} diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-all-indirect-accesses.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-all-indirect-accesses.ll index 9edaa72fa55bb..4fcad258d4a74 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-all-indirect-accesses.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-all-indirect-accesses.ll @@ -9,8 +9,7 @@ @B = external addrspace(3) global [0 x i32] define amdgpu_kernel void @kernel_0() { -; CHECK-LABEL: define amdgpu_kernel void @kernel_0 -; CHECK-SAME: () #[[ATTR0:[0-9]+]] {{.*}}.amdgcn.lds.kernel.id [[META2:![0-9]+]] { +; CHECK-LABEL: define amdgpu_kernel void @kernel_0() #0 !llvm.amdgcn.lds.kernel.id !1 { ; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.kernel_0.lds) ] ; CHECK-NEXT: call void @call_store_A() ; CHECK-NEXT: ret void @@ -20,8 +19,7 @@ define amdgpu_kernel void @kernel_0() { } define amdgpu_kernel void @kernel_1() { -; CHECK-LABEL: define amdgpu_kernel void @kernel_1 -; CHECK-SAME: () {{.*}}.amdgcn.lds.kernel.id [[META3:![0-9]+]] { +; CHECK-LABEL: define amdgpu_kernel void @kernel_1() !llvm.amdgcn.lds.kernel.id !2 { ; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel_1.dynlds) ] ; CHECK-NEXT: [[PTR:%.*]] = call ptr @get_B_ptr() ; CHECK-NEXT: ret void @@ -31,8 +29,7 @@ define amdgpu_kernel void @kernel_1() { } define amdgpu_kernel void @kernel_2() { -; CHECK-LABEL: define amdgpu_kernel void @kernel_2 -; CHECK-SAME: () #[[ATTR0]] {{.*}}.amdgcn.lds.kernel.id [[META4:![0-9]+]] { +; CHECK-LABEL: define amdgpu_kernel void @kernel_2() #0 !llvm.amdgcn.lds.kernel.id !3 { ; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.kernel_2.lds) ] ; CHECK-NEXT: call void @store_A() ; CHECK-NEXT: ret void @@ -42,8 +39,7 @@ define amdgpu_kernel void @kernel_2() { } define amdgpu_kernel void @kernel_3() { -; CHECK-LABEL: define amdgpu_kernel void @kernel_3 -; CHECK-SAME: () {{.*}}.amdgcn.lds.kernel.id [[META5:![0-9]+]] { +; CHECK-LABEL: define amdgpu_kernel void @kernel_3() !llvm.amdgcn.lds.kernel.id !4 { ; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel_3.dynlds) ] ; CHECK-NEXT: [[PTR:%.*]] = call ptr @get_B_ptr() ; CHECK-NEXT: ret void diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect-extern-uses-max-reachable-alignment.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect-extern-uses-max-reachable-alignment.ll index a98e170a68b8a..a553375cb51e0 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect-extern-uses-max-reachable-alignment.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-indirect-extern-uses-max-reachable-alignment.ll @@ -13,12 +13,22 @@ @dynamic_shared8 = external addrspace(3) global [0 x i64], align 8 ; CHECK: %llvm.amdgcn.module.lds.t = type { i32 } +; CHECK: @dynamic_kernel_only = external addrspace(3) global [0 x double] +; CHECK: @dynamic_shared8 = external addrspace(3) global [0 x i64], align 8 +; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t poison, align 4, !absolute_symbol !0 +; CHECK: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(3) @llvm.amdgcn.module.lds to ptr)], section "llvm.metadata" ; Alignment of these must be the maximum of the alignment of the reachable symbols +; CHECK: @llvm.amdgcn.expect_align1.dynlds = external addrspace(3) global [0 x i8], align 1, !absolute_symbol !0 +; CHECK: @llvm.amdgcn.expect_align2.dynlds = external addrspace(3) global [0 x i8], align 2, !absolute_symbol !0 +; CHECK: @llvm.amdgcn.expect_align4.dynlds = external addrspace(3) global [0 x i8], align 4, !absolute_symbol !1 +; CHECK: @llvm.amdgcn.expect_align8.dynlds = external addrspace(3) global [0 x i8], align 8, !absolute_symbol !0 ; Align 4 and symbol at address [4 5) as module.lds is reachable +; CHECK: @llvm.amdgcn.expect_max_of_2_and_4.dynlds = external addrspace(3) global [0 x i8], align 4, !absolute_symbol !1 ; Builds a lookup table out of the newly created (suffixed .dynlds) variables in kernel.id order +; CHECK: @llvm.amdgcn.dynlds.offset.table = internal addrspace(4) constant [5 x i32] [i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.expect_align1.dynlds to i32), i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.expect_align2.dynlds to i32), i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.expect_align4.dynlds to i32), i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.expect_align8.dynlds to i32), i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.expect_max_of_2_and_4.dynlds to i32)] @@ -103,8 +113,7 @@ define void @use_shared8() #0 { ; The kernels are annotated with kernel.id and llvm.donothing use of the corresponding variable define amdgpu_kernel void @expect_align1() { -; CHECK-LABEL: define amdgpu_kernel void @expect_align1( -; CHECK-SAME: ) {{.*}}.amdgcn.lds.kernel.id [[META3:![0-9]+]] { +; CHECK-LABEL: define amdgpu_kernel void @expect_align1() !llvm.amdgcn.lds.kernel.id !2 { ; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.expect_align1.dynlds) ] ; CHECK-NEXT: call void @use_shared1() ; CHECK-NEXT: ret void @@ -114,8 +123,7 @@ define amdgpu_kernel void @expect_align1() { } define amdgpu_kernel void @expect_align2() { -; CHECK-LABEL: define amdgpu_kernel void @expect_align2( -; CHECK-SAME: ) {{.*}}.amdgcn.lds.kernel.id [[META4:![0-9]+]] { +; CHECK-LABEL: define amdgpu_kernel void @expect_align2() !llvm.amdgcn.lds.kernel.id !3 { ; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.expect_align2.dynlds) ] ; CHECK-NEXT: call void @use_shared2() ; CHECK-NEXT: ret void @@ -126,7 +134,7 @@ define amdgpu_kernel void @expect_align2() { define amdgpu_kernel void @expect_align4() { ; CHECK-LABEL: define amdgpu_kernel void @expect_align4( -; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {{.*}}.amdgcn.lds.kernel.id [[META5:![0-9]+]] { +; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id !4 { ; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.expect_align4.dynlds) ] ; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ] ; CHECK-NEXT: call void @use_shared4() @@ -138,8 +146,7 @@ define amdgpu_kernel void @expect_align4() { ; Use dynamic_shared directly too. define amdgpu_kernel void @expect_align8() { -; CHECK-LABEL: define amdgpu_kernel void @expect_align8( -; CHECK-SAME: ) {{.*}}.amdgcn.lds.kernel.id [[META6:![0-9]+]] { +; CHECK-LABEL: define amdgpu_kernel void @expect_align8() !llvm.amdgcn.lds.kernel.id !5 { ; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.expect_align8.dynlds) ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i64], ptr addrspace(3) @dynamic_shared8, i32 0, i32 9 ; CHECK-NEXT: store i64 3, ptr addrspace(3) [[ARRAYIDX]], align 8 @@ -155,7 +162,7 @@ define amdgpu_kernel void @expect_align8() { ; Note: use_shared4 uses module.lds so this will allocate at offset 4 define amdgpu_kernel void @expect_max_of_2_and_4() { ; CHECK-LABEL: define amdgpu_kernel void @expect_max_of_2_and_4( -; CHECK-SAME: ) #[[ATTR1]] {{.*}}.amdgcn.lds.kernel.id [[META7:![0-9]+]] { +; CHECK-SAME: ) #[[ATTR1]] !llvm.amdgcn.lds.kernel.id !6 { ; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.expect_max_of_2_and_4.dynlds) ] ; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ] ; CHECK-NEXT: call void @use_shared2() @@ -171,8 +178,10 @@ define amdgpu_kernel void @expect_max_of_2_and_4() { attributes #0 = { noinline } ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) +; CHECK: declare void @llvm.donothing() #2 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +; CHECK: declare noundef i32 @llvm.amdgcn.lds.kernel.id() #3 ; CHECK: attributes #0 = { noinline } ; CHECK: attributes #1 = { "amdgpu-lds-size"="4,4" } @@ -181,8 +190,8 @@ attributes #0 = { noinline } ; CHECK: !0 = !{i32 0, i32 1} ; CHECK: !1 = !{i32 4, i32 5} -; CHECK: !3 = !{i32 0} -; CHECK: !4 = !{i32 1} -; CHECK: !5 = !{i32 2} -; CHECK: !6 = !{i32 3} -; CHECK: !7 = !{i32 4} +; CHECK: !2 = !{i32 0} +; CHECK: !3 = !{i32 1} +; CHECK: !4 = !{i32 2} +; CHECK: !5 = !{i32 3} +; CHECK: !6 = !{i32 4} diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll index 4aa92ce85adef..2a7553ae5d92b 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-via-hybrid.ll @@ -195,7 +195,7 @@ define amdgpu_kernel void @k01() { define amdgpu_kernel void @k23() { ; OPT-LABEL: @k23( -; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds) ], !alias.scope [[META6:![0-9]+]], !noalias [[META9:![0-9]+]] +; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds) ], !alias.scope [[META5:![0-9]+]], !noalias [[META8:![0-9]+]] ; OPT-NEXT: call void @f2() ; OPT-NEXT: call void @f3() ; OPT-NEXT: ret void @@ -245,12 +245,12 @@ define amdgpu_kernel void @k23() { ; Access and allocate three variables define amdgpu_kernel void @k123() { ; OPT-LABEL: @k123( -; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds) ], !alias.scope [[META12:![0-9]+]], !noalias [[META15:![0-9]+]] +; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds) ], !alias.scope [[META11:![0-9]+]], !noalias [[META14:![0-9]+]] ; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ] ; OPT-NEXT: call void @f1() -; OPT-NEXT: [[LD:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 8, !alias.scope [[META15]], !noalias [[META12]] +; OPT-NEXT: [[LD:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 8, !alias.scope [[META14]], !noalias [[META11]] ; OPT-NEXT: [[MUL:%.*]] = mul i8 [[LD]], 8 -; OPT-NEXT: store i8 [[MUL]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 8, !alias.scope [[META15]], !noalias [[META12]] +; OPT-NEXT: store i8 [[MUL]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 8, !alias.scope [[META14]], !noalias [[META11]] ; OPT-NEXT: call void @f2() ; OPT-NEXT: ret void ; @@ -312,19 +312,18 @@ define amdgpu_kernel void @k123() { ; OPT: !1 = !{i32 4, i32 5} ; OPT: !2 = !{i32 8, i32 9} ; OPT: !3 = !{i32 1, !"amdhsa_code_object_version", i32 500} -; OPT: !4 = !{i32 1, !"amdgpu.lowered_lds", i32 1} -; OPT: !5 = !{i32 1} -; OPT: !6 = !{!7} -; OPT: !7 = distinct !{!7, !8} -; OPT: !8 = distinct !{!8} -; OPT: !9 = !{!10} -; OPT: !10 = distinct !{!10, !8} -; OPT: !11 = !{i32 0} -; OPT: !12 = !{!13} -; OPT: !13 = distinct !{!13, !14} -; OPT: !14 = distinct !{!14} -; OPT: !15 = !{!16} -; OPT: !16 = distinct !{!16, !14} +; OPT: !4 = !{i32 1} +; OPT: !5 = !{!6} +; OPT: !6 = distinct !{!6, !7} +; OPT: !7 = distinct !{!7} +; OPT: !8 = !{!9} +; OPT: !9 = distinct !{!9, !7} +; OPT: !10 = !{i32 0} +; OPT: !11 = !{!12} +; OPT: !12 = distinct !{!12, !13} +; OPT: !13 = distinct !{!13} +; OPT: !14 = !{!15} +; OPT: !15 = distinct !{!15, !13} attributes #0 = { "amdgpu-lds-size"="8" } attributes #1 = { "amdgpu-lds-size"="16" } diff --git a/llvm/test/CodeGen/AMDGPU/mmra.ll b/llvm/test/CodeGen/AMDGPU/mmra.ll index 3e88b93125101..444997858bf7a 100644 --- a/llvm/test/CodeGen/AMDGPU/mmra.ll +++ b/llvm/test/CodeGen/AMDGPU/mmra.ll @@ -14,12 +14,12 @@ define void @fence_loads(ptr %ptr) { ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; CHECK-NEXT: ATOMIC_FENCE 5, 1, mmra !1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], mmra !2 - ; CHECK-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr, mmra !2 :: (load acquire (s8) from %ir.ptr, align 4) + ; CHECK-NEXT: ATOMIC_FENCE 5, 1, mmra !0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], mmra !1 + ; CHECK-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr, mmra !1 :: (load acquire (s8) from %ir.ptr, align 4) ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], mmra !3 - ; CHECK-NEXT: FLAT_STORE_BYTE [[COPY3]], killed [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr, mmra !3 :: (store release (s8) into %ir.ptr, align 4) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], mmra !2 + ; CHECK-NEXT: FLAT_STORE_BYTE [[COPY3]], killed [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr, mmra !2 :: (store release (s8) into %ir.ptr, align 4) ; CHECK-NEXT: SI_RETURN fence release, !mmra !0 %ld = load atomic i8, ptr %ptr acquire, align 4, !mmra !2 @@ -37,8 +37,8 @@ define void @atomicrmw_acq(ptr %ptr) { ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], mmra !2 - ; CHECK-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE killed [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr, mmra !2 :: (load acquire (s8) from %ir.ptr) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], mmra !1 + ; CHECK-NEXT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE killed [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr, mmra !1 :: (load acquire (s8) from %ir.ptr) ; CHECK-NEXT: SI_RETURN %old.2 = atomicrmw add ptr %ptr, i8 0 acquire, !mmra !2 ret void @@ -69,8 +69,8 @@ define void @atomicrmw_rel(ptr %ptr) { ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 255 ; CHECK-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 killed [[V_LSHLREV_B32_e64_]], killed [[S_MOV_B32_2]], implicit $exec ; CHECK-NEXT: [[V_NOT_B32_e32_:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 [[V_LSHLREV_B32_e64_1]], implicit $exec - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]], mmra !3 - ; CHECK-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec, implicit $flat_scr, mmra !3 :: (load (s32) from %ir.AlignedAddr) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]], mmra !2 + ; CHECK-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec, implicit $flat_scr, mmra !2 :: (load (s32) from %ir.AlignedAddr) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.atomicrmw.start: @@ -83,9 +83,9 @@ define void @atomicrmw_rel(ptr %ptr) { ; CHECK-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_2]], %subreg.sub0, [[PHI1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]], mmra !3 - ; CHECK-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY4]], killed [[COPY6]], 0, 1, implicit $exec, implicit $flat_scr, mmra !3 :: (load store release monotonic (s32) on %ir.AlignedAddr) - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[FLAT_ATOMIC_CMPSWAP_RTN]], [[PHI1]], implicit $exec, mmra !3 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]], mmra !2 + ; CHECK-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY4]], killed [[COPY6]], 0, 1, implicit $exec, implicit $flat_scr, mmra !2 :: (load store release monotonic (s32) on %ir.AlignedAddr) + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[FLAT_ATOMIC_CMPSWAP_RTN]], [[PHI1]], implicit $exec, mmra !2 ; CHECK-NEXT: [[SI_IF_BREAK:%[0-9]+]]:sreg_64 = SI_IF_BREAK killed [[V_CMP_EQ_U32_e64_]], [[PHI]], implicit-def dead $scc ; CHECK-NEXT: SI_LOOP [[SI_IF_BREAK]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.2 @@ -125,8 +125,8 @@ define void @cmpxchg(ptr %ptr) { ; CHECK-NEXT: [[V_NOT_B32_e32_:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 killed [[V_LSHLREV_B32_e64_1]], implicit $exec ; CHECK-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; CHECK-NEXT: [[V_LSHLREV_B32_e64_2:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[V_LSHLREV_B32_e64_]], killed [[S_MOV_B32_3]], implicit $exec - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]], mmra !2 - ; CHECK-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec, implicit $flat_scr, mmra !2 :: (load (s32) from %ir.AlignedAddr) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]], mmra !1 + ; CHECK-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec, implicit $flat_scr, mmra !1 :: (load (s32) from %ir.AlignedAddr) ; CHECK-NEXT: [[V_AND_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 killed [[FLAT_LOAD_DWORD]], [[V_NOT_B32_e32_]], implicit $exec ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; CHECK-NEXT: [[DEF4:%[0-9]+]]:sreg_64 = IMPLICIT_DEF @@ -141,8 +141,8 @@ define void @cmpxchg(ptr %ptr) { ; CHECK-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_OR_B32_e64_]], %subreg.sub0, [[PHI2]], %subreg.sub1 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]], mmra !2 - ; CHECK-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY4]], killed [[COPY6]], 0, 1, implicit $exec, implicit $flat_scr, mmra !2 :: (load store acquire acquire (s32) on %ir.AlignedAddr) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]], mmra !1 + ; CHECK-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY4]], killed [[COPY6]], 0, 1, implicit $exec, implicit $flat_scr, mmra !1 :: (load store acquire acquire (s32) on %ir.AlignedAddr) ; CHECK-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 [[FLAT_ATOMIC_CMPSWAP_RTN]], [[PHI2]], implicit $exec ; CHECK-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF @@ -248,8 +248,8 @@ define void @atomicrmw_rel_deepcopy(ptr %ptr) { ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 255 ; CHECK-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 killed [[V_LSHLREV_B32_e64_]], killed [[S_MOV_B32_2]], implicit $exec ; CHECK-NEXT: [[V_NOT_B32_e32_:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 [[V_LSHLREV_B32_e64_1]], implicit $exec - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]], mmra !1 - ; CHECK-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY31]], 0, 0, implicit $exec, implicit $flat_scr, mmra !1 :: (load (s32) from %ir.AlignedAddr) + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]], mmra !0 + ; CHECK-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY31]], 0, 0, implicit $exec, implicit $flat_scr, mmra !0 :: (load (s32) from %ir.AlignedAddr) ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.atomicrmw.start: @@ -262,9 +262,9 @@ define void @atomicrmw_rel_deepcopy(ptr %ptr) { ; CHECK-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_2]], %subreg.sub0, [[PHI1]], %subreg.sub1 - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]], mmra !1 - ; CHECK-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY30]], killed [[COPY32]], 0, 1, implicit $exec, implicit $flat_scr, mmra !1 :: (load store release monotonic (s32) on %ir.AlignedAddr) - ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[FLAT_ATOMIC_CMPSWAP_RTN]], [[PHI1]], implicit $exec, mmra !1 + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]], mmra !0 + ; CHECK-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY30]], killed [[COPY32]], 0, 1, implicit $exec, implicit $flat_scr, mmra !0 :: (load store release monotonic (s32) on %ir.AlignedAddr) + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[FLAT_ATOMIC_CMPSWAP_RTN]], [[PHI1]], implicit $exec, mmra !0 ; CHECK-NEXT: [[SI_IF_BREAK:%[0-9]+]]:sreg_64 = SI_IF_BREAK killed [[V_CMP_EQ_U32_e64_]], [[PHI]], implicit-def dead $scc ; CHECK-NEXT: SI_LOOP [[SI_IF_BREAK]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.2 diff --git a/llvm/test/CodeGen/ARM/nofpclass.ll b/llvm/test/CodeGen/ARM/nofpclass.ll deleted file mode 100644 index aaeb6c11fa598..0000000000000 --- a/llvm/test/CodeGen/ARM/nofpclass.ll +++ /dev/null @@ -1,37 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=armv8-unknown-none-eabi < %s | FileCheck %s --check-prefixes=CHECK,HARD -; RUN: llc -mtriple=armv8-unknown-none-eabi -mattr=+soft-float < %s | FileCheck %s --check-prefixes=CHECK,SOFT - -define nofpclass(nan inf) half @f1(half returned nofpclass(nan inf) %x) { -; CHECK-LABEL: f1: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: bx lr -entry: - ret half %x -} - -define noundef half @f2(half nofpclass(nan) %a) { -; HARD-LABEL: f2: -; HARD: @ %bb.0: @ %entry -; HARD-NEXT: vmov.f32 s0, #1.000000e+00 -; HARD-NEXT: vmov s2, r0 -; HARD-NEXT: vcvtb.f32.f16 s2, s2 -; HARD-NEXT: vadd.f32 s0, s2, s0 -; HARD-NEXT: vcvtb.f16.f32 s0, s0 -; HARD-NEXT: vmov r0, s0 -; HARD-NEXT: bx lr -; -; SOFT-LABEL: f2: -; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r11, lr} -; SOFT-NEXT: push {r11, lr} -; SOFT-NEXT: uxth r0, r0 -; SOFT-NEXT: bl __aeabi_h2f -; SOFT-NEXT: mov r1, #1065353216 -; SOFT-NEXT: bl __aeabi_fadd -; SOFT-NEXT: bl __aeabi_f2h -; SOFT-NEXT: pop {r11, pc} -entry: - %0 = fadd half %a, 0xH3C00 - ret half %0 -} diff --git a/llvm/test/CodeGen/DirectX/ShaderFlags/low-precision.ll b/llvm/test/CodeGen/DirectX/ShaderFlags/low-precision.ll index 5ecac3980d044..561e09bb1e9dc 100644 --- a/llvm/test/CodeGen/DirectX/ShaderFlags/low-precision.ll +++ b/llvm/test/CodeGen/DirectX/ShaderFlags/low-precision.ll @@ -1,10 +1,4 @@ ; RUN: opt -S --passes="print-dx-shader-flags" 2>&1 %s | FileCheck %s -; RUN: llc %s --filetype=obj -o - | obj2yaml | FileCheck %s --check-prefix=DXC - -; Check that when the dx.nativelowprec module flag is not specified, the -; module-level shader flag UseNativeLowPrecision is not set, and the -; MinimumPrecision feature flag is set due to the presence of half and i16 -; without native low precision. target triple = "dxil-pc-shadermodel6.7-library" @@ -12,33 +6,25 @@ target triple = "dxil-pc-shadermodel6.7-library" ;CHECK-NEXT: ; Shader Flags Value: 0x00000020 ;CHECK-NEXT: ; ;CHECK-NEXT: ; Note: shader requires additional functionality: -;CHECK-NEXT: ; Minimum-precision data types ;CHECK-NEXT: ; Note: extra DXIL module flags: -;CHECK-NEXT: ; Low-precision data types present +;CHECK-NEXT: ; Low-precision data types ;CHECK-NEXT: ; ;CHECK-NEXT: ; Shader Flags for Module Functions ;CHECK-LABEL: ; Function add_i16 : 0x00000020 -define i16 @add_i16(i16 %a, i16 %b) "hlsl.export" { +define i16 @add_i16(i16 %a, i16 %b) { %sum = add i16 %a, %b ret i16 %sum } ;CHECK-LABEL: ; Function add_i32 : 0x00000000 -define i32 @add_i32(i32 %a, i32 %b) "hlsl.export" { +define i32 @add_i32(i32 %a, i32 %b) { %sum = add i32 %a, %b ret i32 %sum } ;CHECK-LABEL: ; Function add_half : 0x00000020 -define half @add_half(half %a, half %b) "hlsl.export" { +define half @add_half(half %a, half %b) { %sum = fadd half %a, %b ret half %sum } - -; DXC: - Name: SFI0 -; DXC-NEXT: Size: 8 -; DXC-NEXT: Flags: -; DXC: MinimumPrecision: true -; DXC: NativeLowPrecision: false -; DXC: ... diff --git a/llvm/test/CodeGen/DirectX/ShaderFlags/res-may-not-alias-sm6.7.ll b/llvm/test/CodeGen/DirectX/ShaderFlags/res-may-not-alias-sm6.7.ll index fc6560e321b4b..934319557a11f 100644 --- a/llvm/test/CodeGen/DirectX/ShaderFlags/res-may-not-alias-sm6.7.ll +++ b/llvm/test/CodeGen/DirectX/ShaderFlags/res-may-not-alias-sm6.7.ll @@ -2,8 +2,8 @@ ; This test checks to ensure the behavior of the DXIL shader flag analysis ; for the flag ResMayNotAlias is correct when the DXIL Version is >= 1.7 and the -; DXIL Validator Version < 1.8. The ResMayNotAlias module flag (0x20000000) -; should be set if there are one or more UAVs present globally in the +; DXIL Validator Version < 1.8. The ResMayNotAlias flag (0x20000000) should be +; set on all functions if there are one or more UAVs present globally in the ; module. target triple = "dxil-pc-shadermodel6.7-library" @@ -19,7 +19,7 @@ target triple = "dxil-pc-shadermodel6.7-library" ; CHECK: Any UAV may not alias any other UAV ; -; CHECK: Function loadUAV : 0x00000000 +; CHECK: Function loadUAV : 0x200000000 define float @loadUAV() #0 { %res = call target("dx.TypedBuffer", float, 1, 0, 0) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, i1 false) @@ -29,7 +29,7 @@ define float @loadUAV() #0 { ret float %val } -; CHECK: Function loadSRV : 0x00000010 +; CHECK: Function loadSRV : 0x200000010 define float @loadSRV() #0 { %res = tail call target("dx.RawBuffer", float, 0, 0) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, i1 false) diff --git a/llvm/test/CodeGen/DirectX/ShaderFlags/use-native-low-precision-0.ll b/llvm/test/CodeGen/DirectX/ShaderFlags/use-native-low-precision-0.ll index 2e68fe375a42c..c537a01482f39 100644 --- a/llvm/test/CodeGen/DirectX/ShaderFlags/use-native-low-precision-0.ll +++ b/llvm/test/CodeGen/DirectX/ShaderFlags/use-native-low-precision-0.ll @@ -1,9 +1,7 @@ ; RUN: opt -S --passes="print-dx-shader-flags" 2>&1 %s | FileCheck %s -; RUN: llc %s --filetype=obj -o - | obj2yaml | FileCheck %s --check-prefix=DXC ; Check that when the dx.nativelowprec module flag is set to 0, the module-level -; shader flag UseNativeLowPrecision is not set, and the MinimumPrecision feature -; flag is set due to the presence of half and i16 without native low precision. +; shader flag UseNativeLowPrecision is not set target triple = "dxil-pc-shadermodel6.7-library" @@ -11,37 +9,29 @@ target triple = "dxil-pc-shadermodel6.7-library" ;CHECK-NEXT: ; Shader Flags Value: 0x00000020 ;CHECK-NEXT: ; ;CHECK-NEXT: ; Note: shader requires additional functionality: -;CHECK-NEXT: ; Minimum-precision data types ;CHECK-NEXT: ; Note: extra DXIL module flags: -;CHECK-NEXT: ; Low-precision data types present -;CHECK-NOT: ; Enable native low-precision data types +;CHECK-NEXT: ; Low-precision data types +;CHECK-NOT: ; Native 16bit types enabled ;CHECK-NEXT: ; ;CHECK-NEXT: ; Shader Flags for Module Functions ;CHECK-LABEL: ; Function add_i16 : 0x00000020 -define i16 @add_i16(i16 %a, i16 %b) "hlsl.export" { +define i16 @add_i16(i16 %a, i16 %b) { %sum = add i16 %a, %b ret i16 %sum } ;CHECK-LABEL: ; Function add_i32 : 0x00000000 -define i32 @add_i32(i32 %a, i32 %b) "hlsl.export" { +define i32 @add_i32(i32 %a, i32 %b) { %sum = add i32 %a, %b ret i32 %sum } ;CHECK-LABEL: ; Function add_half : 0x00000020 -define half @add_half(half %a, half %b) "hlsl.export" { +define half @add_half(half %a, half %b) { %sum = fadd half %a, %b ret half %sum } !llvm.module.flags = !{!0} !0 = !{i32 1, !"dx.nativelowprec", i32 0} - -; DXC: - Name: SFI0 -; DXC-NEXT: Size: 8 -; DXC-NEXT: Flags: -; DXC: MinimumPrecision: true -; DXC: NativeLowPrecision: false -; DXC: ... diff --git a/llvm/test/CodeGen/DirectX/ShaderFlags/use-native-low-precision-1.ll b/llvm/test/CodeGen/DirectX/ShaderFlags/use-native-low-precision-1.ll index cb3b486cebce5..07c4b9064d666 100644 --- a/llvm/test/CodeGen/DirectX/ShaderFlags/use-native-low-precision-1.ll +++ b/llvm/test/CodeGen/DirectX/ShaderFlags/use-native-low-precision-1.ll @@ -1,9 +1,4 @@ ; RUN: opt -S --passes="print-dx-shader-flags" 2>&1 %s | FileCheck %s -; RUN: llc %s --filetype=obj -o - | obj2yaml | FileCheck %s --check-prefix=DXC - -; Check that when the dx.nativelowprec module flag is set to 1, the module-level -; shader flag UseNativeLowPrecision is set, and the NativeLowPrecision feature -; flag is set target triple = "dxil-pc-shadermodel6.7-library" @@ -11,37 +6,32 @@ target triple = "dxil-pc-shadermodel6.7-library" ;CHECK-NEXT: ; Shader Flags Value: 0x00800020 ;CHECK-NEXT: ; ;CHECK-NEXT: ; Note: shader requires additional functionality: -;CHECK-NEXT: ; Native low-precision data types ;CHECK-NEXT: ; Note: extra DXIL module flags: -;CHECK-NEXT: ; Low-precision data types present -;CHECK-NEXT: ; Enable native low-precision data types +;CHECK-NEXT: ; Low-precision data types +;CHECK-NEXT: ; Use native low precision ;CHECK-NEXT: ; ;CHECK-NEXT: ; Shader Flags for Module Functions -;CHECK-LABEL: ; Function add_i16 : 0x00000020 -define i16 @add_i16(i16 %a, i16 %b) "hlsl.export" { +;CHECK-LABEL: ; Function add_i16 : 0x00800020 +define i16 @add_i16(i16 %a, i16 %b) { %sum = add i16 %a, %b ret i16 %sum } -;CHECK-LABEL: ; Function add_i32 : 0x00000000 -define i32 @add_i32(i32 %a, i32 %b) "hlsl.export" { +; NOTE: The flag for native low precision (0x80000) is set for every function +; in the module regardless of whether or not the function uses low precision +; data types (flag 0x20). This matches the behavior in DXC +;CHECK-LABEL: ; Function add_i32 : 0x00800000 +define i32 @add_i32(i32 %a, i32 %b) { %sum = add i32 %a, %b ret i32 %sum } -;CHECK-LABEL: ; Function add_half : 0x00000020 -define half @add_half(half %a, half %b) "hlsl.export" { +;CHECK-LABEL: ; Function add_half : 0x00800020 +define half @add_half(half %a, half %b) { %sum = fadd half %a, %b ret half %sum } !llvm.module.flags = !{!0} !0 = !{i32 1, !"dx.nativelowprec", i32 1} - -; DXC: - Name: SFI0 -; DXC-NEXT: Size: 8 -; DXC-NEXT: Flags: -; DXC: MinimumPrecision: false -; DXC: NativeLowPrecision: true -; DXC: ... diff --git a/llvm/test/CodeGen/Mips/nofpclass.ll b/llvm/test/CodeGen/Mips/nofpclass.ll deleted file mode 100644 index b9737fe1175b9..0000000000000 --- a/llvm/test/CodeGen/Mips/nofpclass.ll +++ /dev/null @@ -1,224 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc --mtriple=mipsisa32r6-linux-gnu < %s | FileCheck %s --check-prefix=MIPS32R6 -; RUN: llc --mtriple=mipsisa64r6-linux-gnu < %s | FileCheck %s --check-prefix=MIPS64R6 - -define float @f(float nofpclass(nan) %a, float nofpclass(nan) %b) { -; MIPS32R6-LABEL: f: -; MIPS32R6: # %bb.0: # %entry -; MIPS32R6-NEXT: jr $ra -; MIPS32R6-NEXT: max.s $f0, $f12, $f14 -; -; MIPS64R6-LABEL: f: -; MIPS64R6: # %bb.0: # %entry -; MIPS64R6-NEXT: jr $ra -; MIPS64R6-NEXT: max.s $f0, $f12, $f13 -entry: - %cond = tail call float @llvm.maximumnum.f32(float %a, float %b) - ret float %cond -} - -define {float, float} @m({float, float} nofpclass(nan) %a0, {float, float} nofpclass(nan) %a1) { -; MIPS32R6-LABEL: m: -; MIPS32R6: # %bb.0: # %entry -; MIPS32R6-NEXT: mtc1 $6, $f0 -; MIPS32R6-NEXT: max.s $f0, $f12, $f0 -; MIPS32R6-NEXT: mtc1 $7, $f1 -; MIPS32R6-NEXT: jr $ra -; MIPS32R6-NEXT: max.s $f2, $f14, $f1 -; -; MIPS64R6-LABEL: m: -; MIPS64R6: # %bb.0: # %entry -; MIPS64R6-NEXT: max.s $f0, $f12, $f14 -; MIPS64R6-NEXT: jr $ra -; MIPS64R6-NEXT: max.s $f2, $f13, $f15 -entry: - %a0f0 = extractvalue {float, float} %a0, 0 - %a0f1 = extractvalue {float, float} %a0, 1 - %a1f0 = extractvalue {float, float} %a1, 0 - %a1f1 = extractvalue {float, float} %a1, 1 - %max0 = tail call float @llvm.maximumnum.f32(float %a0f0, float %a1f0) - %max1 = tail call float @llvm.maximumnum.f32(float %a0f1, float %a1f1) - %ret0 = insertvalue {float, float} poison, float %max0, 0 - %ret1 = insertvalue {float, float} %ret0, float %max1, 1 - ret {float, float} %ret1 -} - -define [2 x float] @mA([2 x float] nofpclass(nan) %a0, [2 x float] nofpclass(nan) %a1) { -; MIPS32R6-LABEL: mA: -; MIPS32R6: # %bb.0: # %entry -; MIPS32R6-NEXT: mtc1 $6, $f0 -; MIPS32R6-NEXT: max.s $f0, $f12, $f0 -; MIPS32R6-NEXT: mtc1 $7, $f1 -; MIPS32R6-NEXT: jr $ra -; MIPS32R6-NEXT: max.s $f2, $f14, $f1 -; -; MIPS64R6-LABEL: mA: -; MIPS64R6: # %bb.0: # %entry -; MIPS64R6-NEXT: max.s $f0, $f12, $f14 -; MIPS64R6-NEXT: jr $ra -; MIPS64R6-NEXT: max.s $f2, $f13, $f15 -entry: - %a0f0 = extractvalue [2 x float] %a0, 0 - %a0f1 = extractvalue [2 x float] %a0, 1 - %a1f0 = extractvalue [2 x float] %a1, 0 - %a1f1 = extractvalue [2 x float] %a1, 1 - %max0 = tail call float @llvm.maximumnum.f32(float %a0f0, float %a1f0) - %max1 = tail call float @llvm.maximumnum.f32(float %a0f1, float %a1f1) - %ret0 = insertvalue [2 x float] poison, float %max0, 0 - %ret1 = insertvalue [2 x float] %ret0, float %max1, 1 - ret [2 x float] %ret1 -} - -define float @fS(float nofpclass(snan) %a, float nofpclass(snan) %b) { -; MIPS32R6-LABEL: fS: -; MIPS32R6: # %bb.0: # %entry -; MIPS32R6-NEXT: jr $ra -; MIPS32R6-NEXT: max.s $f0, $f12, $f14 -; -; MIPS64R6-LABEL: fS: -; MIPS64R6: # %bb.0: # %entry -; MIPS64R6-NEXT: jr $ra -; MIPS64R6-NEXT: max.s $f0, $f12, $f13 -entry: - %cond = tail call float @llvm.maximumnum.f32(float %a, float %b) - ret float %cond -} - -define {float, float} @mS({float, float} nofpclass(snan) %a0, {float, float} nofpclass(snan) %a1) { -; MIPS32R6-LABEL: mS: -; MIPS32R6: # %bb.0: # %entry -; MIPS32R6-NEXT: mtc1 $6, $f0 -; MIPS32R6-NEXT: max.s $f0, $f12, $f0 -; MIPS32R6-NEXT: mtc1 $7, $f1 -; MIPS32R6-NEXT: jr $ra -; MIPS32R6-NEXT: max.s $f2, $f14, $f1 -; -; MIPS64R6-LABEL: mS: -; MIPS64R6: # %bb.0: # %entry -; MIPS64R6-NEXT: max.s $f0, $f12, $f14 -; MIPS64R6-NEXT: jr $ra -; MIPS64R6-NEXT: max.s $f2, $f13, $f15 -entry: - %a0f0 = extractvalue {float, float} %a0, 0 - %a0f1 = extractvalue {float, float} %a0, 1 - %a1f0 = extractvalue {float, float} %a1, 0 - %a1f1 = extractvalue {float, float} %a1, 1 - %max0 = tail call float @llvm.maximumnum.f32(float %a0f0, float %a1f0) - %max1 = tail call float @llvm.maximumnum.f32(float %a0f1, float %a1f1) - %ret0 = insertvalue {float, float} poison, float %max0, 0 - %ret1 = insertvalue {float, float} %ret0, float %max1, 1 - ret {float, float} %ret1 -} - -define [2 x float] @mAS([2 x float] nofpclass(snan) %a0, [2 x float] nofpclass(snan) %a1) { -; MIPS32R6-LABEL: mAS: -; MIPS32R6: # %bb.0: # %entry -; MIPS32R6-NEXT: mtc1 $6, $f0 -; MIPS32R6-NEXT: max.s $f0, $f12, $f0 -; MIPS32R6-NEXT: mtc1 $7, $f1 -; MIPS32R6-NEXT: jr $ra -; MIPS32R6-NEXT: max.s $f2, $f14, $f1 -; -; MIPS64R6-LABEL: mAS: -; MIPS64R6: # %bb.0: # %entry -; MIPS64R6-NEXT: max.s $f0, $f12, $f14 -; MIPS64R6-NEXT: jr $ra -; MIPS64R6-NEXT: max.s $f2, $f13, $f15 -entry: - %a0f0 = extractvalue [2 x float] %a0, 0 - %a0f1 = extractvalue [2 x float] %a0, 1 - %a1f0 = extractvalue [2 x float] %a1, 0 - %a1f1 = extractvalue [2 x float] %a1, 1 - %max0 = tail call float @llvm.maximumnum.f32(float %a0f0, float %a1f0) - %max1 = tail call float @llvm.maximumnum.f32(float %a0f1, float %a1f1) - %ret0 = insertvalue [2 x float] poison, float %max0, 0 - %ret1 = insertvalue [2 x float] %ret0, float %max1, 1 - ret [2 x float] %ret1 -} - -define float @fQ(float nofpclass(qnan) %a, float nofpclass(qnan) %b) { -; MIPS32R6-LABEL: fQ: -; MIPS32R6: # %bb.0: # %entry -; MIPS32R6-NEXT: min.s $f0, $f14, $f14 -; MIPS32R6-NEXT: min.s $f1, $f12, $f12 -; MIPS32R6-NEXT: jr $ra -; MIPS32R6-NEXT: max.s $f0, $f1, $f0 -; -; MIPS64R6-LABEL: fQ: -; MIPS64R6: # %bb.0: # %entry -; MIPS64R6-NEXT: min.s $f0, $f13, $f13 -; MIPS64R6-NEXT: min.s $f1, $f12, $f12 -; MIPS64R6-NEXT: jr $ra -; MIPS64R6-NEXT: max.s $f0, $f1, $f0 -entry: - %cond = tail call float @llvm.maximumnum.f32(float %a, float %b) - ret float %cond -} - -define {float, float} @mQ({float, float} nofpclass(qnan) %a0, {float, float} nofpclass(qnan) %a1) { -; MIPS32R6-LABEL: mQ: -; MIPS32R6: # %bb.0: # %entry -; MIPS32R6-NEXT: min.s $f0, $f12, $f12 -; MIPS32R6-NEXT: mtc1 $6, $f1 -; MIPS32R6-NEXT: min.s $f1, $f1, $f1 -; MIPS32R6-NEXT: max.s $f0, $f0, $f1 -; MIPS32R6-NEXT: min.s $f1, $f14, $f14 -; MIPS32R6-NEXT: mtc1 $7, $f2 -; MIPS32R6-NEXT: min.s $f2, $f2, $f2 -; MIPS32R6-NEXT: jr $ra -; MIPS32R6-NEXT: max.s $f2, $f1, $f2 -; -; MIPS64R6-LABEL: mQ: -; MIPS64R6: # %bb.0: # %entry -; MIPS64R6-NEXT: min.s $f0, $f14, $f14 -; MIPS64R6-NEXT: min.s $f1, $f12, $f12 -; MIPS64R6-NEXT: max.s $f0, $f1, $f0 -; MIPS64R6-NEXT: min.s $f1, $f15, $f15 -; MIPS64R6-NEXT: min.s $f2, $f13, $f13 -; MIPS64R6-NEXT: jr $ra -; MIPS64R6-NEXT: max.s $f2, $f2, $f1 -entry: - %a0f0 = extractvalue {float, float} %a0, 0 - %a0f1 = extractvalue {float, float} %a0, 1 - %a1f0 = extractvalue {float, float} %a1, 0 - %a1f1 = extractvalue {float, float} %a1, 1 - %max0 = tail call float @llvm.maximumnum.f32(float %a0f0, float %a1f0) - %max1 = tail call float @llvm.maximumnum.f32(float %a0f1, float %a1f1) - %ret0 = insertvalue {float, float} poison, float %max0, 0 - %ret1 = insertvalue {float, float} %ret0, float %max1, 1 - ret {float, float} %ret1 -} - -define [2 x float] @mAQ([2 x float] nofpclass(qnan) %a0, [2 x float] nofpclass(qnan) %a1) { -; MIPS32R6-LABEL: mAQ: -; MIPS32R6: # %bb.0: # %entry -; MIPS32R6-NEXT: min.s $f0, $f12, $f12 -; MIPS32R6-NEXT: mtc1 $6, $f1 -; MIPS32R6-NEXT: min.s $f1, $f1, $f1 -; MIPS32R6-NEXT: max.s $f0, $f0, $f1 -; MIPS32R6-NEXT: min.s $f1, $f14, $f14 -; MIPS32R6-NEXT: mtc1 $7, $f2 -; MIPS32R6-NEXT: min.s $f2, $f2, $f2 -; MIPS32R6-NEXT: jr $ra -; MIPS32R6-NEXT: max.s $f2, $f1, $f2 -; -; MIPS64R6-LABEL: mAQ: -; MIPS64R6: # %bb.0: # %entry -; MIPS64R6-NEXT: min.s $f0, $f14, $f14 -; MIPS64R6-NEXT: min.s $f1, $f12, $f12 -; MIPS64R6-NEXT: max.s $f0, $f1, $f0 -; MIPS64R6-NEXT: min.s $f1, $f15, $f15 -; MIPS64R6-NEXT: min.s $f2, $f13, $f13 -; MIPS64R6-NEXT: jr $ra -; MIPS64R6-NEXT: max.s $f2, $f2, $f1 -entry: - %a0f0 = extractvalue [2 x float] %a0, 0 - %a0f1 = extractvalue [2 x float] %a0, 1 - %a1f0 = extractvalue [2 x float] %a1, 0 - %a1f1 = extractvalue [2 x float] %a1, 1 - %max0 = tail call float @llvm.maximumnum.f32(float %a0f0, float %a1f0) - %max1 = tail call float @llvm.maximumnum.f32(float %a0f1, float %a1f1) - %ret0 = insertvalue [2 x float] poison, float %max0, 0 - %ret1 = insertvalue [2 x float] %ret0, float %max1, 1 - ret [2 x float] %ret1 -} diff --git a/llvm/test/CodeGen/NVPTX/cp-async-bulk-s2g-sm100.ll b/llvm/test/CodeGen/NVPTX/cp-async-bulk-s2g-sm100.ll deleted file mode 100644 index 1e6b04635edd5..0000000000000 --- a/llvm/test/CodeGen/NVPTX/cp-async-bulk-s2g-sm100.ll +++ /dev/null @@ -1,46 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100 -mattr=+ptx86| FileCheck --check-prefixes=CHECK-PTX64 %s -; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100 -mattr=+ptx86 --nvptx-short-ptr| FileCheck --check-prefixes=CHECK-PTX-SHARED32 %s -; RUN: %if ptxas-12.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100 -mattr=+ptx86| %ptxas-verify -arch=sm_100 %} -; RUN: %if ptxas-12.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100 -mattr=+ptx86 --nvptx-short-ptr| %ptxas-verify -arch=sm_100 %} - -target triple = "nvptx64-nvidia-cuda" - -declare void @llvm.nvvm.cp.async.bulk.shared.cta.to.global.bytemask(ptr addrspace(1), ptr addrspace(3), i32, i64, i1, i16) - -define void @cp_async_bulk_s2g_bytemask(ptr addrspace(1) %dst, ptr addrspace(3) %src, i32 %size, i64 %ch, i16 %mask) { -; CHECK-PTX64-LABEL: cp_async_bulk_s2g_bytemask( -; CHECK-PTX64: { -; CHECK-PTX64-NEXT: .reg .b16 %rs<2>; -; CHECK-PTX64-NEXT: .reg .b32 %r<2>; -; CHECK-PTX64-NEXT: .reg .b64 %rd<4>; -; CHECK-PTX64-EMPTY: -; CHECK-PTX64-NEXT: // %bb.0: -; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [cp_async_bulk_s2g_bytemask_param_0]; -; CHECK-PTX64-NEXT: ld.param.b64 %rd2, [cp_async_bulk_s2g_bytemask_param_1]; -; CHECK-PTX64-NEXT: ld.param.b32 %r1, [cp_async_bulk_s2g_bytemask_param_2]; -; CHECK-PTX64-NEXT: ld.param.b64 %rd3, [cp_async_bulk_s2g_bytemask_param_3]; -; CHECK-PTX64-NEXT: ld.param.b16 %rs1, [cp_async_bulk_s2g_bytemask_param_4]; -; CHECK-PTX64-NEXT: cp.async.bulk.global.shared::cta.bulk_group.L2::cache_hint.cp_mask [%rd1], [%rd2], %r1, %rd3, %rs1; -; CHECK-PTX64-NEXT: cp.async.bulk.global.shared::cta.bulk_group.cp_mask [%rd1], [%rd2], %r1, %rs1; -; CHECK-PTX64-NEXT: ret; -; -; CHECK-PTX-SHARED32-LABEL: cp_async_bulk_s2g_bytemask( -; CHECK-PTX-SHARED32: { -; CHECK-PTX-SHARED32-NEXT: .reg .b16 %rs<2>; -; CHECK-PTX-SHARED32-NEXT: .reg .b32 %r<3>; -; CHECK-PTX-SHARED32-NEXT: .reg .b64 %rd<3>; -; CHECK-PTX-SHARED32-EMPTY: -; CHECK-PTX-SHARED32-NEXT: // %bb.0: -; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd1, [cp_async_bulk_s2g_bytemask_param_0]; -; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r1, [cp_async_bulk_s2g_bytemask_param_1]; -; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r2, [cp_async_bulk_s2g_bytemask_param_2]; -; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd2, [cp_async_bulk_s2g_bytemask_param_3]; -; CHECK-PTX-SHARED32-NEXT: ld.param.b16 %rs1, [cp_async_bulk_s2g_bytemask_param_4]; -; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.global.shared::cta.bulk_group.L2::cache_hint.cp_mask [%rd1], [%r1], %r2, %rd2, %rs1; -; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.global.shared::cta.bulk_group.cp_mask [%rd1], [%r1], %r2, %rs1; -; CHECK-PTX-SHARED32-NEXT: ret; - tail call void @llvm.nvvm.cp.async.bulk.shared.cta.to.global.bytemask(ptr addrspace(1) %dst, ptr addrspace(3) %src, i32 %size, i64 %ch, i1 1, i16 %mask) - tail call void @llvm.nvvm.cp.async.bulk.shared.cta.to.global.bytemask(ptr addrspace(1) %dst, ptr addrspace(3) %src, i32 %size, i64 %ch, i1 0, i16 %mask) - ret void -} diff --git a/llvm/test/CodeGen/NVPTX/cp-async-bulk.ll b/llvm/test/CodeGen/NVPTX/cp-async-bulk.ll index d7f2a5df5547e..77694ac82459a 100644 --- a/llvm/test/CodeGen/NVPTX/cp-async-bulk.ll +++ b/llvm/test/CodeGen/NVPTX/cp-async-bulk.ll @@ -66,8 +66,8 @@ define void @cp_async_bulk_s2g(ptr addrspace(3) %src, ptr addrspace(1) %dst, i32 ; CHECK-PTX64-NEXT: ld.param.b64 %rd1, [cp_async_bulk_s2g_param_0]; ; CHECK-PTX64-NEXT: ld.param.b64 %rd2, [cp_async_bulk_s2g_param_1]; ; CHECK-PTX64-NEXT: ld.param.b32 %r1, [cp_async_bulk_s2g_param_2]; -; CHECK-PTX64-NEXT: ld.param.b64 %rd3, [cp_async_bulk_s2g_param_3]; ; CHECK-PTX64-NEXT: cp.async.bulk.global.shared::cta.bulk_group [%rd2], [%rd1], %r1; +; CHECK-PTX64-NEXT: ld.param.b64 %rd3, [cp_async_bulk_s2g_param_3]; ; CHECK-PTX64-NEXT: cp.async.bulk.global.shared::cta.bulk_group.L2::cache_hint [%rd2], [%rd1], %r1, %rd3; ; CHECK-PTX64-NEXT: ret; ; @@ -80,11 +80,11 @@ define void @cp_async_bulk_s2g(ptr addrspace(3) %src, ptr addrspace(1) %dst, i32 ; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r1, [cp_async_bulk_s2g_param_0]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd1, [cp_async_bulk_s2g_param_1]; ; CHECK-PTX-SHARED32-NEXT: ld.param.b32 %r2, [cp_async_bulk_s2g_param_2]; -; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd2, [cp_async_bulk_s2g_param_3]; ; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.global.shared::cta.bulk_group [%rd1], [%r1], %r2; +; CHECK-PTX-SHARED32-NEXT: ld.param.b64 %rd2, [cp_async_bulk_s2g_param_3]; ; CHECK-PTX-SHARED32-NEXT: cp.async.bulk.global.shared::cta.bulk_group.L2::cache_hint [%rd1], [%r1], %r2, %rd2; ; CHECK-PTX-SHARED32-NEXT: ret; - tail call void @llvm.nvvm.cp.async.bulk.shared.cta.to.global(ptr addrspace(1) %dst, ptr addrspace(3) %src, i32 %size, i64 %ch, i1 0) + tail call void @llvm.nvvm.cp.async.bulk.shared.cta.to.global(ptr addrspace(1) %dst, ptr addrspace(3) %src, i32 %size, i64 0, i1 0) tail call void @llvm.nvvm.cp.async.bulk.shared.cta.to.global(ptr addrspace(1) %dst, ptr addrspace(3) %src, i32 %size, i64 %ch, i1 1) ret void } diff --git a/llvm/test/CodeGen/NVPTX/shift-opt.ll b/llvm/test/CodeGen/NVPTX/shift-opt.ll index 65bcbb8e67156..5f5ad831cb148 100644 --- a/llvm/test/CodeGen/NVPTX/shift-opt.ll +++ b/llvm/test/CodeGen/NVPTX/shift-opt.ll @@ -6,13 +6,15 @@ define i64 @test_or(i64 %x, i32 %y) { ; CHECK-LABEL: test_or( ; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_or_param_0]; -; CHECK-NEXT: ld.param.b32 %rd2, [test_or_param_1]; -; CHECK-NEXT: shr.u64 %rd3, %rd1, 5; -; CHECK-NEXT: or.b64 %rd4, %rd3, %rd2; +; CHECK-NEXT: ld.param.b32 %r1, [test_or_param_1]; +; CHECK-NEXT: mul.wide.u32 %rd2, %r1, 32; +; CHECK-NEXT: or.b64 %rd3, %rd1, %rd2; +; CHECK-NEXT: shr.u64 %rd4, %rd3, 5; ; CHECK-NEXT: st.param.b64 [func_retval0], %rd4; ; CHECK-NEXT: ret; %ext = zext i32 %y to i64 @@ -27,13 +29,15 @@ define i64 @test_or(i64 %x, i32 %y) { define i64 @test_xor(i64 %x, i32 %y) { ; CHECK-LABEL: test_xor( ; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_xor_param_0]; -; CHECK-NEXT: ld.param.b32 %rd2, [test_xor_param_1]; -; CHECK-NEXT: shr.u64 %rd3, %rd1, 5; -; CHECK-NEXT: xor.b64 %rd4, %rd3, %rd2; +; CHECK-NEXT: ld.param.b32 %r1, [test_xor_param_1]; +; CHECK-NEXT: mul.wide.u32 %rd2, %r1, 32; +; CHECK-NEXT: xor.b64 %rd3, %rd1, %rd2; +; CHECK-NEXT: shr.u64 %rd4, %rd3, 5; ; CHECK-NEXT: st.param.b64 [func_retval0], %rd4; ; CHECK-NEXT: ret; %ext = zext i32 %y to i64 @@ -48,13 +52,15 @@ define i64 @test_xor(i64 %x, i32 %y) { define i64 @test_and(i64 %x, i32 %y) { ; CHECK-LABEL: test_and( ; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_and_param_0]; -; CHECK-NEXT: ld.param.b32 %rd2, [test_and_param_1]; -; CHECK-NEXT: shr.u64 %rd3, %rd1, 5; -; CHECK-NEXT: and.b64 %rd4, %rd3, %rd2; +; CHECK-NEXT: ld.param.b32 %r1, [test_and_param_1]; +; CHECK-NEXT: mul.wide.u32 %rd2, %r1, 32; +; CHECK-NEXT: and.b64 %rd3, %rd1, %rd2; +; CHECK-NEXT: shr.u64 %rd4, %rd3, 5; ; CHECK-NEXT: st.param.b64 [func_retval0], %rd4; ; CHECK-NEXT: ret; %ext = zext i32 %y to i64 @@ -70,19 +76,23 @@ define i64 @test_and(i64 %x, i32 %y) { define <2 x i16> @test_vec(<2 x i16> %x, <2 x i8> %y) { ; CHECK-LABEL: test_vec( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<5>; -; CHECK-NEXT: .reg .b32 %r<6>; +; CHECK-NEXT: .reg .b16 %rs<9>; +; CHECK-NEXT: .reg .b32 %r<7>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [test_vec_param_0]; ; CHECK-NEXT: ld.param.b32 %r2, [test_vec_param_1]; ; CHECK-NEXT: and.b32 %r3, %r2, 16711935; -; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; -; CHECK-NEXT: shr.u16 %rs3, %rs2, 5; -; CHECK-NEXT: shr.u16 %rs4, %rs1, 5; +; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r3; +; CHECK-NEXT: shl.b16 %rs3, %rs2, 5; +; CHECK-NEXT: shl.b16 %rs4, %rs1, 5; ; CHECK-NEXT: mov.b32 %r4, {%rs4, %rs3}; -; CHECK-NEXT: or.b32 %r5, %r4, %r3; -; CHECK-NEXT: st.param.b32 [func_retval0], %r5; +; CHECK-NEXT: or.b32 %r5, %r1, %r4; +; CHECK-NEXT: mov.b32 {%rs5, %rs6}, %r5; +; CHECK-NEXT: shr.u16 %rs7, %rs6, 5; +; CHECK-NEXT: shr.u16 %rs8, %rs5, 5; +; CHECK-NEXT: mov.b32 %r6, {%rs8, %rs7}; +; CHECK-NEXT: st.param.b32 [func_retval0], %r6; ; CHECK-NEXT: ret; %ext = zext <2 x i8> %y to <2 x i16> %shl = shl <2 x i16> %ext, splat(i16 5) diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll index ab73a85bfd7b1..7ee912a2006fd 100644 --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -9,7 +9,6 @@ ; RUN: llc -mtriple=riscv32 -mattr=+zba,+zbb,+zbs %s -o - | FileCheck --check-prefixes=CHECK,RV32COMBINEINTOB %s ; RUN: llc -mtriple=riscv32 -mattr=+f %s -o - | FileCheck --check-prefixes=CHECK,RV32F %s ; RUN: llc -mtriple=riscv32 -mattr=+d %s -o - | FileCheck --check-prefixes=CHECK,RV32D %s -; RUN: llc -mtriple=riscv32 -mattr=+q %s -o - | FileCheck --check-prefixes=CHECK,RV32Q %s ; RUN: llc -mtriple=riscv32 -mattr=+c %s -o - | FileCheck --check-prefixes=CHECK,RV32C %s ; RUN: llc -mtriple=riscv32 -mattr=+c,+f %s -o - | FileCheck --check-prefixes=CHECK,RV32CF %s ; RUN: llc -mtriple=riscv32 -mattr=+c,+d %s -o - | FileCheck --check-prefixes=CHECK,RV32CD %s @@ -105,7 +104,6 @@ ; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcisls %s -o - | FileCheck --check-prefix=RV32XQCISLS %s ; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcisync %s -o - | FileCheck --check-prefix=RV32XQCISYNC %s ; RUN: llc -mtriple=riscv32 -mattr=+xandesperf %s -o - | FileCheck --check-prefix=RV32XANDESPERF %s -; RUN: llc -mtriple=riscv32 -mattr=+xandesvdot %s -o - | FileCheck --check-prefix=RV32XANDESVDOT %s ; RUN: llc -mtriple=riscv32 -mattr=+xandesvpackfph %s -o - | FileCheck --check-prefix=RV32XANDESVPACKFPH %s ; RUN: llc -mtriple=riscv32 -mattr=+zaamo %s -o - | FileCheck --check-prefix=RV32ZAAMO %s ; RUN: llc -mtriple=riscv32 -mattr=+zalrsc %s -o - | FileCheck --check-prefix=RV32ZALRSC %s @@ -183,7 +181,6 @@ ; RUN: llc -mtriple=riscv64 -mattr=+zba,+zbb,+zbs %s -o - | FileCheck --check-prefixes=CHECK,RV64COMBINEINTOB %s ; RUN: llc -mtriple=riscv64 -mattr=+f %s -o - | FileCheck --check-prefixes=CHECK,RV64F %s ; RUN: llc -mtriple=riscv64 -mattr=+d %s -o - | FileCheck --check-prefixes=CHECK,RV64D %s -; RUN: llc -mtriple=riscv64 -mattr=+q %s -o - | FileCheck --check-prefixes=CHECK,RV64Q %s ; RUN: llc -mtriple=riscv64 -mattr=+c %s -o - | FileCheck --check-prefixes=CHECK,RV64C %s ; RUN: llc -mtriple=riscv64 -mattr=+c,+f %s -o - | FileCheck --check-prefixes=CHECK,RV64CF %s ; RUN: llc -mtriple=riscv64 -mattr=+c,+d %s -o - | FileCheck --check-prefixes=CHECK,RV64CD %s @@ -258,7 +255,6 @@ ; RUN: llc -mtriple=riscv64 -mattr=+xtheadsync %s -o - | FileCheck --check-prefix=RV64XTHEADSYNC %s ; RUN: llc -mtriple=riscv64 -mattr=+xtheadvdot %s -o - | FileCheck --check-prefixes=CHECK,RV64XTHEADVDOT %s ; RUN: llc -mtriple=riscv64 -mattr=+xandesperf %s -o - | FileCheck --check-prefix=RV64XANDESPERF %s -; RUN: llc -mtriple=riscv64 -mattr=+xandesvdot %s -o - | FileCheck --check-prefix=RV64XANDESVDOT %s ; RUN: llc -mtriple=riscv64 -mattr=+xandesvpackfph %s -o - | FileCheck --check-prefix=RV64XANDESVPACKFPH %s ; RUN: llc -mtriple=riscv64 -mattr=+za64rs %s -o - | FileCheck --check-prefixes=CHECK,RV64ZA64RS %s ; RUN: llc -mtriple=riscv64 -mattr=+za128rs %s -o - | FileCheck --check-prefixes=CHECK,RV64ZA128RS %s @@ -358,7 +354,6 @@ ; RV32COMBINEINTOB: .attribute 5, "rv32i2p1_b1p0_zba1p0_zbb1p0_zbs1p0" ; RV32F: .attribute 5, "rv32i2p1_f2p2_zicsr2p0" ; RV32D: .attribute 5, "rv32i2p1_f2p2_d2p2_zicsr2p0" -; RV32Q: .attribute 5, "rv32i2p1_f2p2_d2p2_q2p2_zicsr2p0" ; RV32C: .attribute 5, "rv32i2p1_c2p0_zca1p0" ; RV32CF: .attribute 5, "rv32i2p1_f2p2_c2p0_zicsr2p0_zca1p0_zcf1p0" ; RV32CD: .attribute 5, "rv32i2p1_f2p2_d2p2_c2p0_zicsr2p0_zca1p0_zcd1p0_zcf1p0" @@ -454,7 +449,6 @@ ; RV32XQCISLS: .attribute 5, "rv32i2p1_xqcisls0p2" ; RV32XQCISYNC: attribute 5, "rv32i2p1_zca1p0_xqcisync0p3" ; RV32XANDESPERF: .attribute 5, "rv32i2p1_xandesperf5p0" -; RV32XANDESVDOT: .attribute 5, "rv32i2p1_zicsr2p0_zve32x1p0_zvl32b1p0_xandesvdot5p0" ; RV32XANDESVPACKFPH: .attribute 5, "rv32i2p1_f2p2_zicsr2p0_zve32f1p0_zve32x1p0_zvfhmin1p0_zvl32b1p0_xandesvpackfph5p0" ; RV32ZAAMO: .attribute 5, "rv32i2p1_zaamo1p0" ; RV32ZALRSC: .attribute 5, "rv32i2p1_zalrsc1p0" @@ -529,7 +523,6 @@ ; RV64COMBINEINTOB: .attribute 5, "rv64i2p1_b1p0_zba1p0_zbb1p0_zbs1p0" ; RV64F: .attribute 5, "rv64i2p1_f2p2_zicsr2p0" ; RV64D: .attribute 5, "rv64i2p1_f2p2_d2p2_zicsr2p0" -; RV64Q: .attribute 5, "rv64i2p1_f2p2_d2p2_q2p2_zicsr2p0" ; RV64C: .attribute 5, "rv64i2p1_c2p0_zca1p0" ; RV64CF: .attribute 5, "rv64i2p1_f2p2_c2p0_zicsr2p0_zca1p0" ; RV64CD: .attribute 5, "rv64i2p1_f2p2_d2p2_c2p0_zicsr2p0_zca1p0_zcd1p0" @@ -608,7 +601,6 @@ ; RV64XTHEADSYNC: .attribute 5, "rv64i2p1_xtheadsync1p0" ; RV64XTHEADVDOT: .attribute 5, "rv64i2p1_f2p2_d2p2_v1p0_zicsr2p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0_xtheadvdot1p0" ; RV64XANDESPERF: .attribute 5, "rv64i2p1_xandesperf5p0" -; RV64XANDESVDOT: .attribute 5, "rv64i2p1_zicsr2p0_zve32x1p0_zvl32b1p0_xandesvdot5p0" ; RV64XANDESVPACKFPH: .attribute 5, "rv64i2p1_f2p2_zicsr2p0_zve32f1p0_zve32x1p0_zvfhmin1p0_zvl32b1p0_xandesvpackfph5p0" ; RV64ZTSO: .attribute 5, "rv64i2p1_ztso1p0" ; RV64ZAAMO: .attribute 5, "rv64i2p1_zaamo1p0" diff --git a/llvm/test/CodeGen/RISCV/double-calling-conv.ll b/llvm/test/CodeGen/RISCV/double-calling-conv.ll index 1a01fceca75a5..798eac64e9fc2 100644 --- a/llvm/test/CodeGen/RISCV/double-calling-conv.ll +++ b/llvm/test/CodeGen/RISCV/double-calling-conv.ll @@ -165,10 +165,10 @@ define double @callee_double_stack(i64 %a, i64 %b, i64 %c, i64 %d, double %e, do ; ; RV32IZFINXZDINX-LABEL: callee_double_stack: ; RV32IZFINXZDINX: # %bb.0: -; RV32IZFINXZDINX-NEXT: lw a1, 12(sp) -; RV32IZFINXZDINX-NEXT: lw a3, 4(sp) ; RV32IZFINXZDINX-NEXT: lw a0, 8(sp) +; RV32IZFINXZDINX-NEXT: lw a1, 12(sp) ; RV32IZFINXZDINX-NEXT: lw a2, 0(sp) +; RV32IZFINXZDINX-NEXT: lw a3, 4(sp) ; RV32IZFINXZDINX-NEXT: fadd.d a0, a2, a0 ; RV32IZFINXZDINX-NEXT: ret %1 = fadd double %e, %f diff --git a/llvm/test/CodeGen/RISCV/double-convert.ll b/llvm/test/CodeGen/RISCV/double-convert.ll index 0716650374d0d..03ab83ece8ce7 100644 --- a/llvm/test/CodeGen/RISCV/double-convert.ll +++ b/llvm/test/CodeGen/RISCV/double-convert.ll @@ -734,42 +734,38 @@ define i64 @fcvt_l_d_sat(double %a) nounwind { ; RV32IZFINXZDINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 0(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI12_0) -; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI12_0)(a2) -; RV32IZFINXZDINX-NEXT: addi a2, a2, %lo(.LCPI12_0) -; RV32IZFINXZDINX-NEXT: lw a5, 4(a2) ; RV32IZFINXZDINX-NEXT: mv s1, a1 ; RV32IZFINXZDINX-NEXT: mv s0, a0 -; RV32IZFINXZDINX-NEXT: fle.d s2, a4, s0 ; RV32IZFINXZDINX-NEXT: call __fixdfdi -; RV32IZFINXZDINX-NEXT: lui a3, 524288 +; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI12_0) +; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI12_0+4)(a2) +; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI12_0)(a2) +; RV32IZFINXZDINX-NEXT: fle.d a3, a2, s0 +; RV32IZFINXZDINX-NEXT: lui a4, 524288 ; RV32IZFINXZDINX-NEXT: lui a2, 524288 -; RV32IZFINXZDINX-NEXT: beqz s2, .LBB12_2 +; RV32IZFINXZDINX-NEXT: beqz a3, .LBB12_2 ; RV32IZFINXZDINX-NEXT: # %bb.1: # %start ; RV32IZFINXZDINX-NEXT: mv a2, a1 ; RV32IZFINXZDINX-NEXT: .LBB12_2: # %start ; RV32IZFINXZDINX-NEXT: lui a1, %hi(.LCPI12_1) -; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI12_1)(a1) -; RV32IZFINXZDINX-NEXT: addi a1, a1, %lo(.LCPI12_1) -; RV32IZFINXZDINX-NEXT: lw a5, 4(a1) -; RV32IZFINXZDINX-NEXT: flt.d a1, a4, s0 +; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI12_1)(a1) +; RV32IZFINXZDINX-NEXT: lw a7, %lo(.LCPI12_1+4)(a1) +; RV32IZFINXZDINX-NEXT: flt.d a1, a6, s0 ; RV32IZFINXZDINX-NEXT: beqz a1, .LBB12_4 ; RV32IZFINXZDINX-NEXT: # %bb.3: -; RV32IZFINXZDINX-NEXT: addi a2, a3, -1 +; RV32IZFINXZDINX-NEXT: addi a2, a4, -1 ; RV32IZFINXZDINX-NEXT: .LBB12_4: # %start -; RV32IZFINXZDINX-NEXT: feq.d a3, s0, s0 -; RV32IZFINXZDINX-NEXT: neg a4, a1 -; RV32IZFINXZDINX-NEXT: neg a1, s2 +; RV32IZFINXZDINX-NEXT: feq.d a4, s0, s0 ; RV32IZFINXZDINX-NEXT: neg a3, a3 -; RV32IZFINXZDINX-NEXT: and a0, a1, a0 -; RV32IZFINXZDINX-NEXT: and a1, a3, a2 -; RV32IZFINXZDINX-NEXT: or a0, a4, a0 +; RV32IZFINXZDINX-NEXT: neg a5, a1 +; RV32IZFINXZDINX-NEXT: neg a4, a4 ; RV32IZFINXZDINX-NEXT: and a0, a3, a0 +; RV32IZFINXZDINX-NEXT: and a1, a4, a2 +; RV32IZFINXZDINX-NEXT: or a0, a5, a0 +; RV32IZFINXZDINX-NEXT: and a0, a4, a0 ; RV32IZFINXZDINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 16 ; RV32IZFINXZDINX-NEXT: ret ; @@ -990,13 +986,12 @@ define i64 @fcvt_lu_d_sat(double %a) nounwind { ; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero ; RV32IZFINXZDINX-NEXT: lui a4, %hi(.LCPI14_0) ; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0 -; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI14_0)(a4) -; RV32IZFINXZDINX-NEXT: addi a3, a4, %lo(.LCPI14_0) -; RV32IZFINXZDINX-NEXT: lw a7, 4(a3) +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI14_0+4)(a4) +; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI14_0)(a4) ; RV32IZFINXZDINX-NEXT: neg a2, a2 ; RV32IZFINXZDINX-NEXT: and a0, a2, a0 ; RV32IZFINXZDINX-NEXT: and a1, a2, a1 -; RV32IZFINXZDINX-NEXT: flt.d a2, a6, s0 +; RV32IZFINXZDINX-NEXT: flt.d a2, a4, s0 ; RV32IZFINXZDINX-NEXT: neg a2, a2 ; RV32IZFINXZDINX-NEXT: or a0, a2, a0 ; RV32IZFINXZDINX-NEXT: or a1, a2, a1 @@ -1658,19 +1653,17 @@ define signext i16 @fcvt_w_s_sat_i16(double %a) nounwind { ; RV32IZFINXZDINX-LABEL: fcvt_w_s_sat_i16: ; RV32IZFINXZDINX: # %bb.0: # %start ; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI26_0) -; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI26_1) -; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI26_0)(a2) -; RV32IZFINXZDINX-NEXT: addi a2, a2, %lo(.LCPI26_0) -; RV32IZFINXZDINX-NEXT: lw a5, 4(a2) -; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI26_1)(a3) -; RV32IZFINXZDINX-NEXT: addi a3, a3, %lo(.LCPI26_1) -; RV32IZFINXZDINX-NEXT: lw a3, 4(a3) -; RV32IZFINXZDINX-NEXT: feq.d a6, a0, a0 -; RV32IZFINXZDINX-NEXT: fmax.d a0, a0, a4 -; RV32IZFINXZDINX-NEXT: fmin.d a0, a0, a2 -; RV32IZFINXZDINX-NEXT: fcvt.w.d a0, a0, rtz -; RV32IZFINXZDINX-NEXT: neg a1, a6 -; RV32IZFINXZDINX-NEXT: and a0, a1, a0 +; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI26_0+4)(a2) +; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI26_0)(a2) +; RV32IZFINXZDINX-NEXT: lui a4, %hi(.LCPI26_1) +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI26_1+4)(a4) +; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI26_1)(a4) +; RV32IZFINXZDINX-NEXT: fmax.d a2, a0, a2 +; RV32IZFINXZDINX-NEXT: feq.d a0, a0, a0 +; RV32IZFINXZDINX-NEXT: neg a0, a0 +; RV32IZFINXZDINX-NEXT: fmin.d a2, a2, a4 +; RV32IZFINXZDINX-NEXT: fcvt.w.d a1, a2, rtz +; RV32IZFINXZDINX-NEXT: and a0, a0, a1 ; RV32IZFINXZDINX-NEXT: ret ; ; RV64IZFINXZDINX-LABEL: fcvt_w_s_sat_i16: @@ -1857,12 +1850,11 @@ define zeroext i16 @fcvt_wu_s_sat_i16(double %a) nounwind { ; RV32IZFINXZDINX-LABEL: fcvt_wu_s_sat_i16: ; RV32IZFINXZDINX: # %bb.0: # %start ; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI28_0) -; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI28_0)(a2) -; RV32IZFINXZDINX-NEXT: addi a2, a2, %lo(.LCPI28_0) -; RV32IZFINXZDINX-NEXT: lw a5, 4(a2) -; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero -; RV32IZFINXZDINX-NEXT: fmax.d a0, a0, a2 -; RV32IZFINXZDINX-NEXT: fmin.d a0, a0, a4 +; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI28_0+4)(a2) +; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI28_0)(a2) +; RV32IZFINXZDINX-NEXT: fcvt.d.w a4, zero +; RV32IZFINXZDINX-NEXT: fmax.d a0, a0, a4 +; RV32IZFINXZDINX-NEXT: fmin.d a0, a0, a2 ; RV32IZFINXZDINX-NEXT: fcvt.wu.d a0, a0, rtz ; RV32IZFINXZDINX-NEXT: ret ; @@ -2036,19 +2028,17 @@ define signext i8 @fcvt_w_s_sat_i8(double %a) nounwind { ; RV32IZFINXZDINX-LABEL: fcvt_w_s_sat_i8: ; RV32IZFINXZDINX: # %bb.0: # %start ; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI30_0) -; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI30_1) -; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI30_0)(a2) -; RV32IZFINXZDINX-NEXT: addi a2, a2, %lo(.LCPI30_0) -; RV32IZFINXZDINX-NEXT: lw a5, 4(a2) -; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI30_1)(a3) -; RV32IZFINXZDINX-NEXT: addi a3, a3, %lo(.LCPI30_1) -; RV32IZFINXZDINX-NEXT: lw a3, 4(a3) -; RV32IZFINXZDINX-NEXT: feq.d a6, a0, a0 -; RV32IZFINXZDINX-NEXT: fmax.d a0, a0, a4 -; RV32IZFINXZDINX-NEXT: fmin.d a0, a0, a2 -; RV32IZFINXZDINX-NEXT: fcvt.w.d a0, a0, rtz -; RV32IZFINXZDINX-NEXT: neg a1, a6 -; RV32IZFINXZDINX-NEXT: and a0, a1, a0 +; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI30_0+4)(a2) +; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI30_0)(a2) +; RV32IZFINXZDINX-NEXT: lui a4, %hi(.LCPI30_1) +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI30_1+4)(a4) +; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI30_1)(a4) +; RV32IZFINXZDINX-NEXT: fmax.d a2, a0, a2 +; RV32IZFINXZDINX-NEXT: feq.d a0, a0, a0 +; RV32IZFINXZDINX-NEXT: neg a0, a0 +; RV32IZFINXZDINX-NEXT: fmin.d a2, a2, a4 +; RV32IZFINXZDINX-NEXT: fcvt.w.d a1, a2, rtz +; RV32IZFINXZDINX-NEXT: and a0, a0, a1 ; RV32IZFINXZDINX-NEXT: ret ; ; RV64IZFINXZDINX-LABEL: fcvt_w_s_sat_i8: @@ -2234,12 +2224,11 @@ define zeroext i8 @fcvt_wu_s_sat_i8(double %a) nounwind { ; RV32IZFINXZDINX-LABEL: fcvt_wu_s_sat_i8: ; RV32IZFINXZDINX: # %bb.0: # %start ; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI32_0) -; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI32_0)(a2) -; RV32IZFINXZDINX-NEXT: addi a2, a2, %lo(.LCPI32_0) -; RV32IZFINXZDINX-NEXT: lw a5, 4(a2) -; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero -; RV32IZFINXZDINX-NEXT: fmax.d a0, a0, a2 -; RV32IZFINXZDINX-NEXT: fmin.d a0, a0, a4 +; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI32_0+4)(a2) +; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI32_0)(a2) +; RV32IZFINXZDINX-NEXT: fcvt.d.w a4, zero +; RV32IZFINXZDINX-NEXT: fmax.d a0, a0, a4 +; RV32IZFINXZDINX-NEXT: fmin.d a0, a0, a2 ; RV32IZFINXZDINX-NEXT: fcvt.wu.d a0, a0, rtz ; RV32IZFINXZDINX-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/double-imm.ll b/llvm/test/CodeGen/RISCV/double-imm.ll index d134f8ff16149..5350a28005738 100644 --- a/llvm/test/CodeGen/RISCV/double-imm.ll +++ b/llvm/test/CodeGen/RISCV/double-imm.ll @@ -55,10 +55,9 @@ define double @double_imm_op(double %a) nounwind { ; CHECKRV32ZDINX-LABEL: double_imm_op: ; CHECKRV32ZDINX: # %bb.0: ; CHECKRV32ZDINX-NEXT: lui a2, %hi(.LCPI1_0) -; CHECKRV32ZDINX-NEXT: lw a4, %lo(.LCPI1_0)(a2) -; CHECKRV32ZDINX-NEXT: addi a2, a2, %lo(.LCPI1_0) -; CHECKRV32ZDINX-NEXT: lw a5, 4(a2) -; CHECKRV32ZDINX-NEXT: fadd.d a0, a0, a4 +; CHECKRV32ZDINX-NEXT: lw a3, %lo(.LCPI1_0+4)(a2) +; CHECKRV32ZDINX-NEXT: lw a2, %lo(.LCPI1_0)(a2) +; CHECKRV32ZDINX-NEXT: fadd.d a0, a0, a2 ; CHECKRV32ZDINX-NEXT: ret ; ; CHECKRV64ZDINX-LABEL: double_imm_op: diff --git a/llvm/test/CodeGen/RISCV/double-mem.ll b/llvm/test/CodeGen/RISCV/double-mem.ll index 0d02ce127c325..dba9489e7511d 100644 --- a/llvm/test/CodeGen/RISCV/double-mem.ll +++ b/llvm/test/CodeGen/RISCV/double-mem.ll @@ -7,8 +7,6 @@ ; RUN: -target-abi=ilp32 | FileCheck -check-prefixes=RV32IZFINXZDINX %s ; RUN: llc -mtriple=riscv64 -mattr=+zdinx -verify-machineinstrs < %s \ ; RUN: -target-abi=lp64 | FileCheck -check-prefixes=RV64IZFINXZDINX %s -; RUN: llc -mtriple=riscv32 -mattr=+zdinx,+zilsd -verify-machineinstrs < %s \ -; RUN: -target-abi=ilp32 | FileCheck -check-prefixes=RV32IZFINXZDINXZILSD %s define dso_local double @fld(ptr %a) nounwind { ; CHECKIFD-LABEL: fld: @@ -20,9 +18,9 @@ define dso_local double @fld(ptr %a) nounwind { ; ; RV32IZFINXZDINX-LABEL: fld: ; RV32IZFINXZDINX: # %bb.0: +; RV32IZFINXZDINX-NEXT: lw a2, 0(a0) ; RV32IZFINXZDINX-NEXT: lw a3, 4(a0) ; RV32IZFINXZDINX-NEXT: lw a1, 28(a0) -; RV32IZFINXZDINX-NEXT: lw a2, 0(a0) ; RV32IZFINXZDINX-NEXT: lw a0, 24(a0) ; RV32IZFINXZDINX-NEXT: fadd.d a0, a2, a0 ; RV32IZFINXZDINX-NEXT: ret @@ -33,13 +31,6 @@ define dso_local double @fld(ptr %a) nounwind { ; RV64IZFINXZDINX-NEXT: ld a0, 24(a0) ; RV64IZFINXZDINX-NEXT: fadd.d a0, a1, a0 ; RV64IZFINXZDINX-NEXT: ret -; -; RV32IZFINXZDINXZILSD-LABEL: fld: -; RV32IZFINXZDINXZILSD: # %bb.0: -; RV32IZFINXZDINXZILSD-NEXT: ld a2, 0(a0) -; RV32IZFINXZDINXZILSD-NEXT: ld a0, 24(a0) -; RV32IZFINXZDINXZILSD-NEXT: fadd.d a0, a2, a0 -; RV32IZFINXZDINXZILSD-NEXT: ret %1 = load double, ptr %a %2 = getelementptr double, ptr %a, i32 3 %3 = load double, ptr %2 @@ -76,17 +67,6 @@ define dso_local void @fsd(ptr %a, double %b, double %c) nounwind { ; RV64IZFINXZDINX-NEXT: sd a1, 0(a0) ; RV64IZFINXZDINX-NEXT: sd a1, 64(a0) ; RV64IZFINXZDINX-NEXT: ret -; -; RV32IZFINXZDINXZILSD-LABEL: fsd: -; RV32IZFINXZDINXZILSD: # %bb.0: -; RV32IZFINXZDINXZILSD-NEXT: mv a5, a4 -; RV32IZFINXZDINXZILSD-NEXT: mv a7, a2 -; RV32IZFINXZDINXZILSD-NEXT: mv a4, a3 -; RV32IZFINXZDINXZILSD-NEXT: mv a6, a1 -; RV32IZFINXZDINXZILSD-NEXT: fadd.d a2, a6, a4 -; RV32IZFINXZDINXZILSD-NEXT: sd a2, 0(a0) -; RV32IZFINXZDINXZILSD-NEXT: sd a2, 64(a0) -; RV32IZFINXZDINXZILSD-NEXT: ret ; Use %b and %c in an FP op to ensure floating point registers are used, even ; for the soft float ABI %1 = fadd double %b, %c @@ -115,13 +95,13 @@ define dso_local double @fld_fsd_global(double %a, double %b) nounwind { ; RV32IZFINXZDINX: # %bb.0: ; RV32IZFINXZDINX-NEXT: lui a4, %hi(G) ; RV32IZFINXZDINX-NEXT: fadd.d a0, a0, a2 -; RV32IZFINXZDINX-NEXT: lw zero, %lo(G)(a4) -; RV32IZFINXZDINX-NEXT: lw zero, %lo(G+4)(a4) +; RV32IZFINXZDINX-NEXT: lw a2, %lo(G)(a4) +; RV32IZFINXZDINX-NEXT: lw a3, %lo(G+4)(a4) ; RV32IZFINXZDINX-NEXT: addi a2, a4, %lo(G) ; RV32IZFINXZDINX-NEXT: sw a0, %lo(G)(a4) ; RV32IZFINXZDINX-NEXT: sw a1, %lo(G+4)(a4) -; RV32IZFINXZDINX-NEXT: lw zero, 72(a2) -; RV32IZFINXZDINX-NEXT: lw zero, 76(a2) +; RV32IZFINXZDINX-NEXT: lw a4, 72(a2) +; RV32IZFINXZDINX-NEXT: lw a5, 76(a2) ; RV32IZFINXZDINX-NEXT: sw a0, 72(a2) ; RV32IZFINXZDINX-NEXT: sw a1, 76(a2) ; RV32IZFINXZDINX-NEXT: ret @@ -136,17 +116,6 @@ define dso_local double @fld_fsd_global(double %a, double %b) nounwind { ; RV64IZFINXZDINX-NEXT: ld zero, 72(a2) ; RV64IZFINXZDINX-NEXT: sd a0, 72(a2) ; RV64IZFINXZDINX-NEXT: ret -; -; RV32IZFINXZDINXZILSD-LABEL: fld_fsd_global: -; RV32IZFINXZDINXZILSD: # %bb.0: -; RV32IZFINXZDINXZILSD-NEXT: lui a4, %hi(G) -; RV32IZFINXZDINXZILSD-NEXT: fadd.d a0, a0, a2 -; RV32IZFINXZDINXZILSD-NEXT: ld a2, %lo(G)(a4) -; RV32IZFINXZDINXZILSD-NEXT: addi a2, a4, %lo(G) -; RV32IZFINXZDINXZILSD-NEXT: sd a0, %lo(G)(a4) -; RV32IZFINXZDINXZILSD-NEXT: ld a4, 72(a2) -; RV32IZFINXZDINXZILSD-NEXT: sd a0, 72(a2) -; RV32IZFINXZDINXZILSD-NEXT: ret ; Use %a and %b in an FP op to ensure floating point registers are used, even ; for the soft float ABI %1 = fadd double %a, %b @@ -195,14 +164,6 @@ define dso_local double @fld_fsd_constant(double %a) nounwind { ; RV64IZFINXZDINX-NEXT: fadd.d a0, a0, a2 ; RV64IZFINXZDINX-NEXT: sd a0, -273(a1) ; RV64IZFINXZDINX-NEXT: ret -; -; RV32IZFINXZDINXZILSD-LABEL: fld_fsd_constant: -; RV32IZFINXZDINXZILSD: # %bb.0: -; RV32IZFINXZDINXZILSD-NEXT: lui a2, 912092 -; RV32IZFINXZDINXZILSD-NEXT: ld a4, -273(a2) -; RV32IZFINXZDINXZILSD-NEXT: fadd.d a0, a0, a4 -; RV32IZFINXZDINXZILSD-NEXT: sd a0, -273(a2) -; RV32IZFINXZDINXZILSD-NEXT: ret %1 = inttoptr i32 3735928559 to ptr %2 = load volatile double, ptr %1 %3 = fadd double %a, %2 @@ -253,8 +214,8 @@ define dso_local double @fld_stack(double %a) nounwind { ; RV32IZFINXZDINX-NEXT: mv s0, a0 ; RV32IZFINXZDINX-NEXT: addi a0, sp, 8 ; RV32IZFINXZDINX-NEXT: call notdead -; RV32IZFINXZDINX-NEXT: lw a1, 12(sp) ; RV32IZFINXZDINX-NEXT: lw a0, 8(sp) +; RV32IZFINXZDINX-NEXT: lw a1, 12(sp) ; RV32IZFINXZDINX-NEXT: fadd.d a0, a0, s0 ; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -276,24 +237,6 @@ define dso_local double @fld_stack(double %a) nounwind { ; RV64IZFINXZDINX-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64IZFINXZDINX-NEXT: addi sp, sp, 32 ; RV64IZFINXZDINX-NEXT: ret -; -; RV32IZFINXZDINXZILSD-LABEL: fld_stack: -; RV32IZFINXZDINXZILSD: # %bb.0: -; RV32IZFINXZDINXZILSD-NEXT: addi sp, sp, -32 -; RV32IZFINXZDINXZILSD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32IZFINXZDINXZILSD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32IZFINXZDINXZILSD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINXZILSD-NEXT: mv s1, a1 -; RV32IZFINXZDINXZILSD-NEXT: mv s0, a0 -; RV32IZFINXZDINXZILSD-NEXT: addi a0, sp, 8 -; RV32IZFINXZDINXZILSD-NEXT: call notdead -; RV32IZFINXZDINXZILSD-NEXT: ld a0, 8(sp) -; RV32IZFINXZDINXZILSD-NEXT: fadd.d a0, a0, s0 -; RV32IZFINXZDINXZILSD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32IZFINXZDINXZILSD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32IZFINXZDINXZILSD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINXZILSD-NEXT: addi sp, sp, 32 -; RV32IZFINXZDINXZILSD-NEXT: ret %1 = alloca double, align 8 call void @notdead(ptr %1) %2 = load double, ptr %1 @@ -350,18 +293,6 @@ define dso_local void @fsd_stack(double %a, double %b) nounwind { ; RV64IZFINXZDINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IZFINXZDINX-NEXT: addi sp, sp, 16 ; RV64IZFINXZDINX-NEXT: ret -; -; RV32IZFINXZDINXZILSD-LABEL: fsd_stack: -; RV32IZFINXZDINXZILSD: # %bb.0: -; RV32IZFINXZDINXZILSD-NEXT: addi sp, sp, -16 -; RV32IZFINXZDINXZILSD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IZFINXZDINXZILSD-NEXT: fadd.d a0, a0, a2 -; RV32IZFINXZDINXZILSD-NEXT: sd a0, 0(sp) -; RV32IZFINXZDINXZILSD-NEXT: mv a0, sp -; RV32IZFINXZDINXZILSD-NEXT: call notdead -; RV32IZFINXZDINXZILSD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IZFINXZDINXZILSD-NEXT: addi sp, sp, 16 -; RV32IZFINXZDINXZILSD-NEXT: ret %1 = fadd double %a, %b ; force store from FPR64 %2 = alloca double, align 8 store double %1, ptr %2 @@ -390,14 +321,6 @@ define dso_local void @fsd_trunc(ptr %a, double %b) nounwind noinline optnone { ; RV64IZFINXZDINX-NEXT: fcvt.s.d a1, a1 ; RV64IZFINXZDINX-NEXT: sw a1, 0(a0) ; RV64IZFINXZDINX-NEXT: ret -; -; RV32IZFINXZDINXZILSD-LABEL: fsd_trunc: -; RV32IZFINXZDINXZILSD: # %bb.0: -; RV32IZFINXZDINXZILSD-NEXT: mv a3, a2 -; RV32IZFINXZDINXZILSD-NEXT: mv a2, a1 -; RV32IZFINXZDINXZILSD-NEXT: fcvt.s.d a1, a2 -; RV32IZFINXZDINXZILSD-NEXT: sw a1, 0(a0) -; RV32IZFINXZDINXZILSD-NEXT: ret %1 = fptrunc double %b to float store float %1, ptr %a, align 4 ret void diff --git a/llvm/test/CodeGen/RISCV/double-previous-failure.ll b/llvm/test/CodeGen/RISCV/double-previous-failure.ll index c0993faa9584a..c5a7ee79364c6 100644 --- a/llvm/test/CodeGen/RISCV/double-previous-failure.ll +++ b/llvm/test/CodeGen/RISCV/double-previous-failure.ll @@ -51,17 +51,15 @@ define i32 @main() nounwind { ; RV32IZFINXZDINX-NEXT: li a0, 0 ; RV32IZFINXZDINX-NEXT: call test ; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI1_0) -; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI1_0)(a2) -; RV32IZFINXZDINX-NEXT: addi a2, a2, %lo(.LCPI1_0) -; RV32IZFINXZDINX-NEXT: lw a5, 4(a2) -; RV32IZFINXZDINX-NEXT: flt.d a2, a0, a4 +; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI1_0+4)(a2) +; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI1_0)(a2) +; RV32IZFINXZDINX-NEXT: flt.d a2, a0, a2 ; RV32IZFINXZDINX-NEXT: bnez a2, .LBB1_3 ; RV32IZFINXZDINX-NEXT: # %bb.1: # %entry ; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI1_1) -; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI1_1)(a2) -; RV32IZFINXZDINX-NEXT: addi a2, a2, %lo(.LCPI1_1) -; RV32IZFINXZDINX-NEXT: lw a5, 4(a2) -; RV32IZFINXZDINX-NEXT: flt.d a0, a4, a0 +; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI1_1+4)(a2) +; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI1_1)(a2) +; RV32IZFINXZDINX-NEXT: flt.d a0, a2, a0 ; RV32IZFINXZDINX-NEXT: bnez a0, .LBB1_3 ; RV32IZFINXZDINX-NEXT: # %bb.2: # %if.end ; RV32IZFINXZDINX-NEXT: call exit diff --git a/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll index e685d21cc0928..cd87f2d2301d7 100644 --- a/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll +++ b/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll @@ -100,33 +100,31 @@ define i64 @test_floor_si64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: mv s1, a1 ; RV32IZFINXZDINX-NEXT: call __fixdfdi ; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI1_0) +; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI1_1) ; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI1_0)(a2) -; RV32IZFINXZDINX-NEXT: addi a2, a2, %lo(.LCPI1_0) -; RV32IZFINXZDINX-NEXT: lw a5, 4(a2) -; RV32IZFINXZDINX-NEXT: fle.d a3, a4, s0 +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI1_0+4)(a2) +; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI1_1)(a3) +; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI1_1+4)(a3) +; RV32IZFINXZDINX-NEXT: fle.d a6, a4, s0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0 +; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0 ; RV32IZFINXZDINX-NEXT: lui a4, 524288 -; RV32IZFINXZDINX-NEXT: lui a2, 524288 -; RV32IZFINXZDINX-NEXT: beqz a3, .LBB1_2 +; RV32IZFINXZDINX-NEXT: neg a2, a2 +; RV32IZFINXZDINX-NEXT: neg a5, a6 +; RV32IZFINXZDINX-NEXT: and a0, a5, a0 +; RV32IZFINXZDINX-NEXT: neg a5, a3 +; RV32IZFINXZDINX-NEXT: or a0, a5, a0 +; RV32IZFINXZDINX-NEXT: lui a5, 524288 +; RV32IZFINXZDINX-NEXT: beqz a6, .LBB1_2 ; RV32IZFINXZDINX-NEXT: # %bb.1: -; RV32IZFINXZDINX-NEXT: mv a2, a1 +; RV32IZFINXZDINX-NEXT: mv a5, a1 ; RV32IZFINXZDINX-NEXT: .LBB1_2: -; RV32IZFINXZDINX-NEXT: lui a1, %hi(.LCPI1_1) -; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI1_1)(a1) -; RV32IZFINXZDINX-NEXT: addi a1, a1, %lo(.LCPI1_1) -; RV32IZFINXZDINX-NEXT: lw a7, 4(a1) -; RV32IZFINXZDINX-NEXT: flt.d a1, a6, s0 -; RV32IZFINXZDINX-NEXT: beqz a1, .LBB1_4 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 +; RV32IZFINXZDINX-NEXT: beqz a3, .LBB1_4 ; RV32IZFINXZDINX-NEXT: # %bb.3: -; RV32IZFINXZDINX-NEXT: addi a2, a4, -1 +; RV32IZFINXZDINX-NEXT: addi a5, a4, -1 ; RV32IZFINXZDINX-NEXT: .LBB1_4: -; RV32IZFINXZDINX-NEXT: feq.d a4, s0, s0 -; RV32IZFINXZDINX-NEXT: neg a3, a3 -; RV32IZFINXZDINX-NEXT: neg a5, a1 -; RV32IZFINXZDINX-NEXT: neg a4, a4 -; RV32IZFINXZDINX-NEXT: and a0, a3, a0 -; RV32IZFINXZDINX-NEXT: and a1, a4, a2 -; RV32IZFINXZDINX-NEXT: or a0, a5, a0 -; RV32IZFINXZDINX-NEXT: and a0, a4, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a5 ; RV32IZFINXZDINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -219,29 +217,25 @@ define i64 @test_floor_ui64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call floor -; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI3_0) -; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI3_0)(a2) -; RV32IZFINXZDINX-NEXT: addi a2, a2, %lo(.LCPI3_0) -; RV32IZFINXZDINX-NEXT: lw a5, 4(a2) ; RV32IZFINXZDINX-NEXT: mv s0, a0 ; RV32IZFINXZDINX-NEXT: mv s1, a1 -; RV32IZFINXZDINX-NEXT: flt.d a0, a4, s0 -; RV32IZFINXZDINX-NEXT: neg s2, a0 -; RV32IZFINXZDINX-NEXT: mv a0, s0 ; RV32IZFINXZDINX-NEXT: call __fixunsdfdi ; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero +; RV32IZFINXZDINX-NEXT: lui a4, %hi(.LCPI3_0) ; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0 +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI3_0+4)(a4) +; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI3_0)(a4) ; RV32IZFINXZDINX-NEXT: neg a2, a2 ; RV32IZFINXZDINX-NEXT: and a0, a2, a0 ; RV32IZFINXZDINX-NEXT: and a1, a2, a1 -; RV32IZFINXZDINX-NEXT: or a0, s2, a0 -; RV32IZFINXZDINX-NEXT: or a1, s2, a1 +; RV32IZFINXZDINX-NEXT: flt.d a2, a4, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a2 +; RV32IZFINXZDINX-NEXT: or a0, a2, a0 +; RV32IZFINXZDINX-NEXT: or a1, a2, a1 ; RV32IZFINXZDINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 16 ; RV32IZFINXZDINX-NEXT: ret ; @@ -350,33 +344,31 @@ define i64 @test_ceil_si64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: mv s1, a1 ; RV32IZFINXZDINX-NEXT: call __fixdfdi ; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI5_0) +; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI5_1) ; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI5_0)(a2) -; RV32IZFINXZDINX-NEXT: addi a2, a2, %lo(.LCPI5_0) -; RV32IZFINXZDINX-NEXT: lw a5, 4(a2) -; RV32IZFINXZDINX-NEXT: fle.d a3, a4, s0 +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI5_0+4)(a2) +; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI5_1)(a3) +; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI5_1+4)(a3) +; RV32IZFINXZDINX-NEXT: fle.d a6, a4, s0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0 +; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0 ; RV32IZFINXZDINX-NEXT: lui a4, 524288 -; RV32IZFINXZDINX-NEXT: lui a2, 524288 -; RV32IZFINXZDINX-NEXT: beqz a3, .LBB5_2 +; RV32IZFINXZDINX-NEXT: neg a2, a2 +; RV32IZFINXZDINX-NEXT: neg a5, a6 +; RV32IZFINXZDINX-NEXT: and a0, a5, a0 +; RV32IZFINXZDINX-NEXT: neg a5, a3 +; RV32IZFINXZDINX-NEXT: or a0, a5, a0 +; RV32IZFINXZDINX-NEXT: lui a5, 524288 +; RV32IZFINXZDINX-NEXT: beqz a6, .LBB5_2 ; RV32IZFINXZDINX-NEXT: # %bb.1: -; RV32IZFINXZDINX-NEXT: mv a2, a1 +; RV32IZFINXZDINX-NEXT: mv a5, a1 ; RV32IZFINXZDINX-NEXT: .LBB5_2: -; RV32IZFINXZDINX-NEXT: lui a1, %hi(.LCPI5_1) -; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI5_1)(a1) -; RV32IZFINXZDINX-NEXT: addi a1, a1, %lo(.LCPI5_1) -; RV32IZFINXZDINX-NEXT: lw a7, 4(a1) -; RV32IZFINXZDINX-NEXT: flt.d a1, a6, s0 -; RV32IZFINXZDINX-NEXT: beqz a1, .LBB5_4 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 +; RV32IZFINXZDINX-NEXT: beqz a3, .LBB5_4 ; RV32IZFINXZDINX-NEXT: # %bb.3: -; RV32IZFINXZDINX-NEXT: addi a2, a4, -1 +; RV32IZFINXZDINX-NEXT: addi a5, a4, -1 ; RV32IZFINXZDINX-NEXT: .LBB5_4: -; RV32IZFINXZDINX-NEXT: feq.d a4, s0, s0 -; RV32IZFINXZDINX-NEXT: neg a3, a3 -; RV32IZFINXZDINX-NEXT: neg a5, a1 -; RV32IZFINXZDINX-NEXT: neg a4, a4 -; RV32IZFINXZDINX-NEXT: and a0, a3, a0 -; RV32IZFINXZDINX-NEXT: and a1, a4, a2 -; RV32IZFINXZDINX-NEXT: or a0, a5, a0 -; RV32IZFINXZDINX-NEXT: and a0, a4, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a5 ; RV32IZFINXZDINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -469,29 +461,25 @@ define i64 @test_ceil_ui64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call ceil -; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI7_0) -; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI7_0)(a2) -; RV32IZFINXZDINX-NEXT: addi a2, a2, %lo(.LCPI7_0) -; RV32IZFINXZDINX-NEXT: lw a5, 4(a2) ; RV32IZFINXZDINX-NEXT: mv s0, a0 ; RV32IZFINXZDINX-NEXT: mv s1, a1 -; RV32IZFINXZDINX-NEXT: flt.d a0, a4, s0 -; RV32IZFINXZDINX-NEXT: neg s2, a0 -; RV32IZFINXZDINX-NEXT: mv a0, s0 ; RV32IZFINXZDINX-NEXT: call __fixunsdfdi ; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero +; RV32IZFINXZDINX-NEXT: lui a4, %hi(.LCPI7_0) ; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0 +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI7_0+4)(a4) +; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI7_0)(a4) ; RV32IZFINXZDINX-NEXT: neg a2, a2 ; RV32IZFINXZDINX-NEXT: and a0, a2, a0 ; RV32IZFINXZDINX-NEXT: and a1, a2, a1 -; RV32IZFINXZDINX-NEXT: or a0, s2, a0 -; RV32IZFINXZDINX-NEXT: or a1, s2, a1 +; RV32IZFINXZDINX-NEXT: flt.d a2, a4, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a2 +; RV32IZFINXZDINX-NEXT: or a0, a2, a0 +; RV32IZFINXZDINX-NEXT: or a1, a2, a1 ; RV32IZFINXZDINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 16 ; RV32IZFINXZDINX-NEXT: ret ; @@ -600,33 +588,31 @@ define i64 @test_trunc_si64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: mv s1, a1 ; RV32IZFINXZDINX-NEXT: call __fixdfdi ; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI9_0) +; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI9_1) ; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI9_0)(a2) -; RV32IZFINXZDINX-NEXT: addi a2, a2, %lo(.LCPI9_0) -; RV32IZFINXZDINX-NEXT: lw a5, 4(a2) -; RV32IZFINXZDINX-NEXT: fle.d a3, a4, s0 +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI9_0+4)(a2) +; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI9_1)(a3) +; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI9_1+4)(a3) +; RV32IZFINXZDINX-NEXT: fle.d a6, a4, s0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0 +; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0 ; RV32IZFINXZDINX-NEXT: lui a4, 524288 -; RV32IZFINXZDINX-NEXT: lui a2, 524288 -; RV32IZFINXZDINX-NEXT: beqz a3, .LBB9_2 +; RV32IZFINXZDINX-NEXT: neg a2, a2 +; RV32IZFINXZDINX-NEXT: neg a5, a6 +; RV32IZFINXZDINX-NEXT: and a0, a5, a0 +; RV32IZFINXZDINX-NEXT: neg a5, a3 +; RV32IZFINXZDINX-NEXT: or a0, a5, a0 +; RV32IZFINXZDINX-NEXT: lui a5, 524288 +; RV32IZFINXZDINX-NEXT: beqz a6, .LBB9_2 ; RV32IZFINXZDINX-NEXT: # %bb.1: -; RV32IZFINXZDINX-NEXT: mv a2, a1 +; RV32IZFINXZDINX-NEXT: mv a5, a1 ; RV32IZFINXZDINX-NEXT: .LBB9_2: -; RV32IZFINXZDINX-NEXT: lui a1, %hi(.LCPI9_1) -; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI9_1)(a1) -; RV32IZFINXZDINX-NEXT: addi a1, a1, %lo(.LCPI9_1) -; RV32IZFINXZDINX-NEXT: lw a7, 4(a1) -; RV32IZFINXZDINX-NEXT: flt.d a1, a6, s0 -; RV32IZFINXZDINX-NEXT: beqz a1, .LBB9_4 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 +; RV32IZFINXZDINX-NEXT: beqz a3, .LBB9_4 ; RV32IZFINXZDINX-NEXT: # %bb.3: -; RV32IZFINXZDINX-NEXT: addi a2, a4, -1 +; RV32IZFINXZDINX-NEXT: addi a5, a4, -1 ; RV32IZFINXZDINX-NEXT: .LBB9_4: -; RV32IZFINXZDINX-NEXT: feq.d a4, s0, s0 -; RV32IZFINXZDINX-NEXT: neg a3, a3 -; RV32IZFINXZDINX-NEXT: neg a5, a1 -; RV32IZFINXZDINX-NEXT: neg a4, a4 -; RV32IZFINXZDINX-NEXT: and a0, a3, a0 -; RV32IZFINXZDINX-NEXT: and a1, a4, a2 -; RV32IZFINXZDINX-NEXT: or a0, a5, a0 -; RV32IZFINXZDINX-NEXT: and a0, a4, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a5 ; RV32IZFINXZDINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -719,29 +705,25 @@ define i64 @test_trunc_ui64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call trunc -; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI11_0) -; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI11_0)(a2) -; RV32IZFINXZDINX-NEXT: addi a2, a2, %lo(.LCPI11_0) -; RV32IZFINXZDINX-NEXT: lw a5, 4(a2) ; RV32IZFINXZDINX-NEXT: mv s0, a0 ; RV32IZFINXZDINX-NEXT: mv s1, a1 -; RV32IZFINXZDINX-NEXT: flt.d a0, a4, s0 -; RV32IZFINXZDINX-NEXT: neg s2, a0 -; RV32IZFINXZDINX-NEXT: mv a0, s0 ; RV32IZFINXZDINX-NEXT: call __fixunsdfdi ; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero +; RV32IZFINXZDINX-NEXT: lui a4, %hi(.LCPI11_0) ; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0 +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI11_0+4)(a4) +; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI11_0)(a4) ; RV32IZFINXZDINX-NEXT: neg a2, a2 ; RV32IZFINXZDINX-NEXT: and a0, a2, a0 ; RV32IZFINXZDINX-NEXT: and a1, a2, a1 -; RV32IZFINXZDINX-NEXT: or a0, s2, a0 -; RV32IZFINXZDINX-NEXT: or a1, s2, a1 +; RV32IZFINXZDINX-NEXT: flt.d a2, a4, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a2 +; RV32IZFINXZDINX-NEXT: or a0, a2, a0 +; RV32IZFINXZDINX-NEXT: or a1, a2, a1 ; RV32IZFINXZDINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 16 ; RV32IZFINXZDINX-NEXT: ret ; @@ -850,33 +832,31 @@ define i64 @test_round_si64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: mv s1, a1 ; RV32IZFINXZDINX-NEXT: call __fixdfdi ; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI13_0) +; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI13_1) ; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI13_0)(a2) -; RV32IZFINXZDINX-NEXT: addi a2, a2, %lo(.LCPI13_0) -; RV32IZFINXZDINX-NEXT: lw a5, 4(a2) -; RV32IZFINXZDINX-NEXT: fle.d a3, a4, s0 +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI13_0+4)(a2) +; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI13_1)(a3) +; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI13_1+4)(a3) +; RV32IZFINXZDINX-NEXT: fle.d a6, a4, s0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0 +; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0 ; RV32IZFINXZDINX-NEXT: lui a4, 524288 -; RV32IZFINXZDINX-NEXT: lui a2, 524288 -; RV32IZFINXZDINX-NEXT: beqz a3, .LBB13_2 +; RV32IZFINXZDINX-NEXT: neg a2, a2 +; RV32IZFINXZDINX-NEXT: neg a5, a6 +; RV32IZFINXZDINX-NEXT: and a0, a5, a0 +; RV32IZFINXZDINX-NEXT: neg a5, a3 +; RV32IZFINXZDINX-NEXT: or a0, a5, a0 +; RV32IZFINXZDINX-NEXT: lui a5, 524288 +; RV32IZFINXZDINX-NEXT: beqz a6, .LBB13_2 ; RV32IZFINXZDINX-NEXT: # %bb.1: -; RV32IZFINXZDINX-NEXT: mv a2, a1 +; RV32IZFINXZDINX-NEXT: mv a5, a1 ; RV32IZFINXZDINX-NEXT: .LBB13_2: -; RV32IZFINXZDINX-NEXT: lui a1, %hi(.LCPI13_1) -; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI13_1)(a1) -; RV32IZFINXZDINX-NEXT: addi a1, a1, %lo(.LCPI13_1) -; RV32IZFINXZDINX-NEXT: lw a7, 4(a1) -; RV32IZFINXZDINX-NEXT: flt.d a1, a6, s0 -; RV32IZFINXZDINX-NEXT: beqz a1, .LBB13_4 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 +; RV32IZFINXZDINX-NEXT: beqz a3, .LBB13_4 ; RV32IZFINXZDINX-NEXT: # %bb.3: -; RV32IZFINXZDINX-NEXT: addi a2, a4, -1 +; RV32IZFINXZDINX-NEXT: addi a5, a4, -1 ; RV32IZFINXZDINX-NEXT: .LBB13_4: -; RV32IZFINXZDINX-NEXT: feq.d a4, s0, s0 -; RV32IZFINXZDINX-NEXT: neg a3, a3 -; RV32IZFINXZDINX-NEXT: neg a5, a1 -; RV32IZFINXZDINX-NEXT: neg a4, a4 -; RV32IZFINXZDINX-NEXT: and a0, a3, a0 -; RV32IZFINXZDINX-NEXT: and a1, a4, a2 -; RV32IZFINXZDINX-NEXT: or a0, a5, a0 -; RV32IZFINXZDINX-NEXT: and a0, a4, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a5 ; RV32IZFINXZDINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -969,29 +949,25 @@ define i64 @test_round_ui64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call round -; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI15_0) -; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI15_0)(a2) -; RV32IZFINXZDINX-NEXT: addi a2, a2, %lo(.LCPI15_0) -; RV32IZFINXZDINX-NEXT: lw a5, 4(a2) ; RV32IZFINXZDINX-NEXT: mv s0, a0 ; RV32IZFINXZDINX-NEXT: mv s1, a1 -; RV32IZFINXZDINX-NEXT: flt.d a0, a4, s0 -; RV32IZFINXZDINX-NEXT: neg s2, a0 -; RV32IZFINXZDINX-NEXT: mv a0, s0 ; RV32IZFINXZDINX-NEXT: call __fixunsdfdi ; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero +; RV32IZFINXZDINX-NEXT: lui a4, %hi(.LCPI15_0) ; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0 +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI15_0+4)(a4) +; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI15_0)(a4) ; RV32IZFINXZDINX-NEXT: neg a2, a2 ; RV32IZFINXZDINX-NEXT: and a0, a2, a0 ; RV32IZFINXZDINX-NEXT: and a1, a2, a1 -; RV32IZFINXZDINX-NEXT: or a0, s2, a0 -; RV32IZFINXZDINX-NEXT: or a1, s2, a1 +; RV32IZFINXZDINX-NEXT: flt.d a2, a4, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a2 +; RV32IZFINXZDINX-NEXT: or a0, a2, a0 +; RV32IZFINXZDINX-NEXT: or a1, a2, a1 ; RV32IZFINXZDINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 16 ; RV32IZFINXZDINX-NEXT: ret ; @@ -1100,33 +1076,31 @@ define i64 @test_roundeven_si64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: mv s1, a1 ; RV32IZFINXZDINX-NEXT: call __fixdfdi ; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI17_0) +; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI17_1) ; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI17_0)(a2) -; RV32IZFINXZDINX-NEXT: addi a2, a2, %lo(.LCPI17_0) -; RV32IZFINXZDINX-NEXT: lw a5, 4(a2) -; RV32IZFINXZDINX-NEXT: fle.d a3, a4, s0 +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI17_0+4)(a2) +; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI17_1)(a3) +; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI17_1+4)(a3) +; RV32IZFINXZDINX-NEXT: fle.d a6, a4, s0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0 +; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0 ; RV32IZFINXZDINX-NEXT: lui a4, 524288 -; RV32IZFINXZDINX-NEXT: lui a2, 524288 -; RV32IZFINXZDINX-NEXT: beqz a3, .LBB17_2 +; RV32IZFINXZDINX-NEXT: neg a2, a2 +; RV32IZFINXZDINX-NEXT: neg a5, a6 +; RV32IZFINXZDINX-NEXT: and a0, a5, a0 +; RV32IZFINXZDINX-NEXT: neg a5, a3 +; RV32IZFINXZDINX-NEXT: or a0, a5, a0 +; RV32IZFINXZDINX-NEXT: lui a5, 524288 +; RV32IZFINXZDINX-NEXT: beqz a6, .LBB17_2 ; RV32IZFINXZDINX-NEXT: # %bb.1: -; RV32IZFINXZDINX-NEXT: mv a2, a1 +; RV32IZFINXZDINX-NEXT: mv a5, a1 ; RV32IZFINXZDINX-NEXT: .LBB17_2: -; RV32IZFINXZDINX-NEXT: lui a1, %hi(.LCPI17_1) -; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI17_1)(a1) -; RV32IZFINXZDINX-NEXT: addi a1, a1, %lo(.LCPI17_1) -; RV32IZFINXZDINX-NEXT: lw a7, 4(a1) -; RV32IZFINXZDINX-NEXT: flt.d a1, a6, s0 -; RV32IZFINXZDINX-NEXT: beqz a1, .LBB17_4 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 +; RV32IZFINXZDINX-NEXT: beqz a3, .LBB17_4 ; RV32IZFINXZDINX-NEXT: # %bb.3: -; RV32IZFINXZDINX-NEXT: addi a2, a4, -1 +; RV32IZFINXZDINX-NEXT: addi a5, a4, -1 ; RV32IZFINXZDINX-NEXT: .LBB17_4: -; RV32IZFINXZDINX-NEXT: feq.d a4, s0, s0 -; RV32IZFINXZDINX-NEXT: neg a3, a3 -; RV32IZFINXZDINX-NEXT: neg a5, a1 -; RV32IZFINXZDINX-NEXT: neg a4, a4 -; RV32IZFINXZDINX-NEXT: and a0, a3, a0 -; RV32IZFINXZDINX-NEXT: and a1, a4, a2 -; RV32IZFINXZDINX-NEXT: or a0, a5, a0 -; RV32IZFINXZDINX-NEXT: and a0, a4, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a5 ; RV32IZFINXZDINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -1219,29 +1193,25 @@ define i64 @test_roundeven_ui64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call roundeven -; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI19_0) -; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI19_0)(a2) -; RV32IZFINXZDINX-NEXT: addi a2, a2, %lo(.LCPI19_0) -; RV32IZFINXZDINX-NEXT: lw a5, 4(a2) ; RV32IZFINXZDINX-NEXT: mv s0, a0 ; RV32IZFINXZDINX-NEXT: mv s1, a1 -; RV32IZFINXZDINX-NEXT: flt.d a0, a4, s0 -; RV32IZFINXZDINX-NEXT: neg s2, a0 -; RV32IZFINXZDINX-NEXT: mv a0, s0 ; RV32IZFINXZDINX-NEXT: call __fixunsdfdi ; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero +; RV32IZFINXZDINX-NEXT: lui a4, %hi(.LCPI19_0) ; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0 +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI19_0+4)(a4) +; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI19_0)(a4) ; RV32IZFINXZDINX-NEXT: neg a2, a2 ; RV32IZFINXZDINX-NEXT: and a0, a2, a0 ; RV32IZFINXZDINX-NEXT: and a1, a2, a1 -; RV32IZFINXZDINX-NEXT: or a0, s2, a0 -; RV32IZFINXZDINX-NEXT: or a1, s2, a1 +; RV32IZFINXZDINX-NEXT: flt.d a2, a4, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a2 +; RV32IZFINXZDINX-NEXT: or a0, a2, a0 +; RV32IZFINXZDINX-NEXT: or a1, a2, a1 ; RV32IZFINXZDINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 16 ; RV32IZFINXZDINX-NEXT: ret ; @@ -1350,33 +1320,31 @@ define i64 @test_rint_si64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: mv s1, a1 ; RV32IZFINXZDINX-NEXT: call __fixdfdi ; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI21_0) +; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI21_1) ; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI21_0)(a2) -; RV32IZFINXZDINX-NEXT: addi a2, a2, %lo(.LCPI21_0) -; RV32IZFINXZDINX-NEXT: lw a5, 4(a2) -; RV32IZFINXZDINX-NEXT: fle.d a3, a4, s0 +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI21_0+4)(a2) +; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI21_1)(a3) +; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI21_1+4)(a3) +; RV32IZFINXZDINX-NEXT: fle.d a6, a4, s0 +; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0 +; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0 ; RV32IZFINXZDINX-NEXT: lui a4, 524288 -; RV32IZFINXZDINX-NEXT: lui a2, 524288 -; RV32IZFINXZDINX-NEXT: beqz a3, .LBB21_2 +; RV32IZFINXZDINX-NEXT: neg a2, a2 +; RV32IZFINXZDINX-NEXT: neg a5, a6 +; RV32IZFINXZDINX-NEXT: and a0, a5, a0 +; RV32IZFINXZDINX-NEXT: neg a5, a3 +; RV32IZFINXZDINX-NEXT: or a0, a5, a0 +; RV32IZFINXZDINX-NEXT: lui a5, 524288 +; RV32IZFINXZDINX-NEXT: beqz a6, .LBB21_2 ; RV32IZFINXZDINX-NEXT: # %bb.1: -; RV32IZFINXZDINX-NEXT: mv a2, a1 +; RV32IZFINXZDINX-NEXT: mv a5, a1 ; RV32IZFINXZDINX-NEXT: .LBB21_2: -; RV32IZFINXZDINX-NEXT: lui a1, %hi(.LCPI21_1) -; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI21_1)(a1) -; RV32IZFINXZDINX-NEXT: addi a1, a1, %lo(.LCPI21_1) -; RV32IZFINXZDINX-NEXT: lw a7, 4(a1) -; RV32IZFINXZDINX-NEXT: flt.d a1, a6, s0 -; RV32IZFINXZDINX-NEXT: beqz a1, .LBB21_4 +; RV32IZFINXZDINX-NEXT: and a0, a2, a0 +; RV32IZFINXZDINX-NEXT: beqz a3, .LBB21_4 ; RV32IZFINXZDINX-NEXT: # %bb.3: -; RV32IZFINXZDINX-NEXT: addi a2, a4, -1 +; RV32IZFINXZDINX-NEXT: addi a5, a4, -1 ; RV32IZFINXZDINX-NEXT: .LBB21_4: -; RV32IZFINXZDINX-NEXT: feq.d a4, s0, s0 -; RV32IZFINXZDINX-NEXT: neg a3, a3 -; RV32IZFINXZDINX-NEXT: neg a5, a1 -; RV32IZFINXZDINX-NEXT: neg a4, a4 -; RV32IZFINXZDINX-NEXT: and a0, a3, a0 -; RV32IZFINXZDINX-NEXT: and a1, a4, a2 -; RV32IZFINXZDINX-NEXT: or a0, a5, a0 -; RV32IZFINXZDINX-NEXT: and a0, a4, a0 +; RV32IZFINXZDINX-NEXT: and a1, a2, a5 ; RV32IZFINXZDINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -1469,29 +1437,25 @@ define i64 @test_rint_ui64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: call rint -; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI23_0) -; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI23_0)(a2) -; RV32IZFINXZDINX-NEXT: addi a2, a2, %lo(.LCPI23_0) -; RV32IZFINXZDINX-NEXT: lw a5, 4(a2) ; RV32IZFINXZDINX-NEXT: mv s0, a0 ; RV32IZFINXZDINX-NEXT: mv s1, a1 -; RV32IZFINXZDINX-NEXT: flt.d a0, a4, s0 -; RV32IZFINXZDINX-NEXT: neg s2, a0 -; RV32IZFINXZDINX-NEXT: mv a0, s0 ; RV32IZFINXZDINX-NEXT: call __fixunsdfdi ; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero +; RV32IZFINXZDINX-NEXT: lui a4, %hi(.LCPI23_0) ; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0 +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI23_0+4)(a4) +; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI23_0)(a4) ; RV32IZFINXZDINX-NEXT: neg a2, a2 ; RV32IZFINXZDINX-NEXT: and a0, a2, a0 ; RV32IZFINXZDINX-NEXT: and a1, a2, a1 -; RV32IZFINXZDINX-NEXT: or a0, s2, a0 -; RV32IZFINXZDINX-NEXT: or a1, s2, a1 +; RV32IZFINXZDINX-NEXT: flt.d a2, a4, s0 +; RV32IZFINXZDINX-NEXT: neg a2, a2 +; RV32IZFINXZDINX-NEXT: or a0, a2, a0 +; RV32IZFINXZDINX-NEXT: or a1, a2, a1 ; RV32IZFINXZDINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 16 ; RV32IZFINXZDINX-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/features-info.ll b/llvm/test/CodeGen/RISCV/features-info.ll index 00311bab50836..cdbb6e6425189 100644 --- a/llvm/test/CodeGen/RISCV/features-info.ll +++ b/llvm/test/CodeGen/RISCV/features-info.ll @@ -74,7 +74,6 @@ ; CHECK-NEXT: predictable-select-expensive - Prefer likely predicted branches over selects. ; CHECK-NEXT: prefer-vsetvli-over-read-vlenb - Prefer vsetvli over read vlenb CSR to calculate VLEN. ; CHECK-NEXT: prefer-w-inst - Prefer instructions with W suffix. -; CHECK-NEXT: q - 'Q' (Quad-Precision Floating-Point). ; CHECK-NEXT: relax - Enable Linker relaxation.. ; CHECK-NEXT: reserve-x1 - Reserve X1. ; CHECK-NEXT: reserve-x10 - Reserve X10. @@ -172,7 +171,6 @@ ; CHECK-NEXT: ventana-veyron - Ventana Veyron-Series processors. ; CHECK-NEXT: vxrm-pipeline-flush - VXRM writes causes pipeline flush. ; CHECK-NEXT: xandesperf - 'XAndesPerf' (Andes Performance Extension). -; CHECK-NEXT: xandesvdot - 'XAndesVDot' (Andes Vector Dot Product Extension). ; CHECK-NEXT: xandesvpackfph - 'XAndesVPackFPH' (Andes Vector Packed FP16 Extension). ; CHECK-NEXT: xcvalu - 'XCValu' (CORE-V ALU Operations). ; CHECK-NEXT: xcvbi - 'XCVbi' (CORE-V Immediate Branching). diff --git a/llvm/test/CodeGen/RISCV/rvv/xandesvpackfph-vfpmadb.ll b/llvm/test/CodeGen/RISCV/rvv/xandesvpackfph-vfpmadb.ll deleted file mode 100644 index feceacd90e5f0..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/xandesvpackfph-vfpmadb.ll +++ /dev/null @@ -1,299 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+zve64x,+xandesvpackfph \ -; RUN: -verify-machineinstrs -target-abi=ilp32f | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+zve64x,+xandesvpackfph \ -; RUN: -verify-machineinstrs -target-abi=lp64f | FileCheck %s - -declare @llvm.riscv.nds.vfpmadb.nxv1f16.f32( - , - , - float, - iXLen, iXLen); - -define @intrinsic_vfpmadb_vf_nxv1f16_nxv1f16_f32( %0, float %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vfpmadb_vf_nxv1f16_nxv1f16_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fsrmi a1, 0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: nds.vfpmadb.vf v8, fa0, v8 -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: ret -entry: - %a = tail call @llvm.riscv.nds.vfpmadb.nxv1f16.f32( - undef, - %0, - float %1, iXLen 0, iXLen %2) - - ret %a -} - -declare @llvm.riscv.nds.vfpmadb.mask.nxv1f16.f32( - , - , - float, - , - iXLen, iXLen, iXLen); - -define @intrinsic_vfpmadb_mask_vf_nxv1f16_nxv1f16_f32( %0, %1, float %2, %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vfpmadb_mask_vf_nxv1f16_nxv1f16_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fsrmi a1, 0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: nds.vfpmadb.vf v8, fa0, v9, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: ret -entry: - %a = tail call @llvm.riscv.nds.vfpmadb.mask.nxv1f16.f32( - %0, - %1, - float %2, - %3, - iXLen 0, iXLen %4, iXLen 1) - - ret %a -} - -declare @llvm.riscv.nds.vfpmadb.nxv2f16.f32( - , - , - float, - iXLen, iXLen); - -define @intrinsic_vfpmadb_vf_nxv2f16_nxv2f16_f32( %0, float %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vfpmadb_vf_nxv2f16_nxv2f16_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fsrmi a1, 0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: nds.vfpmadb.vf v8, fa0, v8 -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: ret -entry: - %a = tail call @llvm.riscv.nds.vfpmadb.nxv2f16.f32( - undef, - %0, - float %1, iXLen 0, iXLen %2) - - ret %a -} - -declare @llvm.riscv.nds.vfpmadb.mask.nxv2f16.f32( - , - , - float, - , - iXLen, iXLen, iXLen); - -define @intrinsic_vfpmadb_mask_vf_nxv2f16_nxv2f16_f32( %0, %1, float %2, %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vfpmadb_mask_vf_nxv2f16_nxv2f16_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fsrmi a1, 0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: nds.vfpmadb.vf v8, fa0, v9, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: ret -entry: - %a = tail call @llvm.riscv.nds.vfpmadb.mask.nxv2f16.f32( - %0, - %1, - float %2, - %3, - iXLen 0, iXLen %4, iXLen 1) - - ret %a -} - -declare @llvm.riscv.nds.vfpmadb.nxv4f16.f32( - , - , - float, - iXLen, iXLen); - -define @intrinsic_vfpmadb_vf_nxv4f16_nxv4f16_f32( %0, float %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vfpmadb_vf_nxv4f16_nxv4f16_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fsrmi a1, 0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: nds.vfpmadb.vf v8, fa0, v8 -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: ret -entry: - %a = tail call @llvm.riscv.nds.vfpmadb.nxv4f16.f32( - undef, - %0, - float %1, iXLen 0, iXLen %2) - - ret %a -} - -declare @llvm.riscv.nds.vfpmadb.mask.nxv4f16.f32( - , - , - float, - , - iXLen, iXLen, iXLen); - -define @intrinsic_vfpmadb_mask_vf_nxv4f16_nxv4f16_f32( %0, %1, float %2, %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vfpmadb_mask_vf_nxv4f16_nxv4f16_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fsrmi a1, 0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: nds.vfpmadb.vf v8, fa0, v9, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: ret -entry: - %a = tail call @llvm.riscv.nds.vfpmadb.mask.nxv4f16.f32( - %0, - %1, - float %2, - %3, - iXLen 0, iXLen %4, iXLen 1) - - ret %a -} - -declare @llvm.riscv.nds.vfpmadb.nxv8f16.f32( - , - , - float, - iXLen, iXLen); - -define @intrinsic_vfpmadb_vf_nxv8f16_nxv8f16_f32( %0, float %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vfpmadb_vf_nxv8f16_nxv8f16_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fsrmi a1, 0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: nds.vfpmadb.vf v8, fa0, v8 -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: ret -entry: - %a = tail call @llvm.riscv.nds.vfpmadb.nxv8f16.f32( - undef, - %0, - float %1, iXLen 0, iXLen %2) - - ret %a -} - -declare @llvm.riscv.nds.vfpmadb.mask.nxv8f16.f32( - , - , - float, - , - iXLen, iXLen, iXLen); - -define @intrinsic_vfpmadb_mask_vf_nxv8f16_nxv8f16_f32( %0, %1, float %2, %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vfpmadb_mask_vf_nxv8f16_nxv8f16_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fsrmi a1, 0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: nds.vfpmadb.vf v8, fa0, v10, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: ret -entry: - %a = tail call @llvm.riscv.nds.vfpmadb.mask.nxv8f16.f32( - %0, - %1, - float %2, - %3, - iXLen 0, iXLen %4, iXLen 1) - - ret %a -} - -declare @llvm.riscv.nds.vfpmadb.nxv16f16.f32( - , - , - float, - iXLen, iXLen); - -define @intrinsic_vfpmadb_vf_nxv16f16_nxv16f16_f32( %0, float %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vfpmadb_vf_nxv16f16_nxv16f16_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fsrmi a1, 0 -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: nds.vfpmadb.vf v8, fa0, v8 -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: ret -entry: - %a = tail call @llvm.riscv.nds.vfpmadb.nxv16f16.f32( - undef, - %0, - float %1, iXLen 0, iXLen %2) - - ret %a -} - -declare @llvm.riscv.nds.vfpmadb.mask.nxv16f16.f32( - , - , - float, - , - iXLen, iXLen, iXLen); - -define @intrinsic_vfpmadb_mask_vf_nxv16f16_nxv16f16_f32( %0, %1, float %2, %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vfpmadb_mask_vf_nxv16f16_nxv16f16_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fsrmi a1, 0 -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: nds.vfpmadb.vf v8, fa0, v12, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: ret -entry: - %a = tail call @llvm.riscv.nds.vfpmadb.mask.nxv16f16.f32( - %0, - %1, - float %2, - %3, - iXLen 0, iXLen %4, iXLen 1) - - ret %a -} - -declare @llvm.riscv.nds.vfpmadb.nxv32f16.f32( - , - , - float, - iXLen, iXLen); - -define @intrinsic_vfpmadb_vf_nxv32f16_nxv32f16_f32( %0, float %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vfpmadb_vf_nxv32f16_nxv32f16_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fsrmi a1, 0 -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: nds.vfpmadb.vf v8, fa0, v8 -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: ret -entry: - %a = tail call @llvm.riscv.nds.vfpmadb.nxv32f16.f32( - undef, - %0, - float %1, iXLen 0, iXLen %2) - - ret %a -} - -declare @llvm.riscv.nds.vfpmadb.mask.nxv32f16.f32( - , - , - float, - , - iXLen, iXLen, iXLen); - -define @intrinsic_vfpmadb_mask_vf_nxv32f16_nxv32f16_f32( %0, %1, float %2, %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vfpmadb_mask_vf_nxv32f16_nxv32f16_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fsrmi a1, 0 -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: nds.vfpmadb.vf v8, fa0, v16, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: ret -entry: - %a = tail call @llvm.riscv.nds.vfpmadb.mask.nxv32f16.f32( - %0, - %1, - float %2, - %3, - iXLen 0, iXLen %4, iXLen 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/xandesvpackfph-vfpmadt.ll b/llvm/test/CodeGen/RISCV/rvv/xandesvpackfph-vfpmadt.ll deleted file mode 100644 index e9d78d2d8b5f5..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/xandesvpackfph-vfpmadt.ll +++ /dev/null @@ -1,299 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+zve64x,+xandesvpackfph \ -; RUN: -verify-machineinstrs -target-abi=ilp32f | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+zve64x,+xandesvpackfph \ -; RUN: -verify-machineinstrs -target-abi=lp64f | FileCheck %s - -declare @llvm.riscv.nds.vfpmadt.nxv1f16.f32( - , - , - float, - iXLen, iXLen); - -define @intrinsic_vfpmadt_vf_nxv1f16_nxv1f16_f32( %0, float %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vfpmadt_vf_nxv1f16_nxv1f16_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fsrmi a1, 0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: nds.vfpmadt.vf v8, fa0, v8 -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: ret -entry: - %a = tail call @llvm.riscv.nds.vfpmadt.nxv1f16.f32( - undef, - %0, - float %1, iXLen 0, iXLen %2) - - ret %a -} - -declare @llvm.riscv.nds.vfpmadt.mask.nxv1f16.f32( - , - , - float, - , - iXLen, iXLen, iXLen); - -define @intrinsic_vfpmadt_mask_vf_nxv1f16_nxv1f16_f32( %0, %1, float %2, %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vfpmadt_mask_vf_nxv1f16_nxv1f16_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fsrmi a1, 0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: nds.vfpmadt.vf v8, fa0, v9, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: ret -entry: - %a = tail call @llvm.riscv.nds.vfpmadt.mask.nxv1f16.f32( - %0, - %1, - float %2, - %3, - iXLen 0, iXLen %4, iXLen 1) - - ret %a -} - -declare @llvm.riscv.nds.vfpmadt.nxv2f16.f32( - , - , - float, - iXLen, iXLen); - -define @intrinsic_vfpmadt_vf_nxv2f16_nxv2f16_f32( %0, float %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vfpmadt_vf_nxv2f16_nxv2f16_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fsrmi a1, 0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: nds.vfpmadt.vf v8, fa0, v8 -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: ret -entry: - %a = tail call @llvm.riscv.nds.vfpmadt.nxv2f16.f32( - undef, - %0, - float %1, iXLen 0, iXLen %2) - - ret %a -} - -declare @llvm.riscv.nds.vfpmadt.mask.nxv2f16.f32( - , - , - float, - , - iXLen, iXLen, iXLen); - -define @intrinsic_vfpmadt_mask_vf_nxv2f16_nxv2f16_f32( %0, %1, float %2, %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vfpmadt_mask_vf_nxv2f16_nxv2f16_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fsrmi a1, 0 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: nds.vfpmadt.vf v8, fa0, v9, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: ret -entry: - %a = tail call @llvm.riscv.nds.vfpmadt.mask.nxv2f16.f32( - %0, - %1, - float %2, - %3, - iXLen 0, iXLen %4, iXLen 1) - - ret %a -} - -declare @llvm.riscv.nds.vfpmadt.nxv4f16.f32( - , - , - float, - iXLen, iXLen); - -define @intrinsic_vfpmadt_vf_nxv4f16_nxv4f16_f32( %0, float %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vfpmadt_vf_nxv4f16_nxv4f16_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fsrmi a1, 0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: nds.vfpmadt.vf v8, fa0, v8 -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: ret -entry: - %a = tail call @llvm.riscv.nds.vfpmadt.nxv4f16.f32( - undef, - %0, - float %1, iXLen 0, iXLen %2) - - ret %a -} - -declare @llvm.riscv.nds.vfpmadt.mask.nxv4f16.f32( - , - , - float, - , - iXLen, iXLen, iXLen); - -define @intrinsic_vfpmadt_mask_vf_nxv4f16_nxv4f16_f32( %0, %1, float %2, %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vfpmadt_mask_vf_nxv4f16_nxv4f16_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fsrmi a1, 0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: nds.vfpmadt.vf v8, fa0, v9, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: ret -entry: - %a = tail call @llvm.riscv.nds.vfpmadt.mask.nxv4f16.f32( - %0, - %1, - float %2, - %3, - iXLen 0, iXLen %4, iXLen 1) - - ret %a -} - -declare @llvm.riscv.nds.vfpmadt.nxv8f16.f32( - , - , - float, - iXLen, iXLen); - -define @intrinsic_vfpmadt_vf_nxv8f16_nxv8f16_f32( %0, float %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vfpmadt_vf_nxv8f16_nxv8f16_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fsrmi a1, 0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: nds.vfpmadt.vf v8, fa0, v8 -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: ret -entry: - %a = tail call @llvm.riscv.nds.vfpmadt.nxv8f16.f32( - undef, - %0, - float %1, iXLen 0, iXLen %2) - - ret %a -} - -declare @llvm.riscv.nds.vfpmadt.mask.nxv8f16.f32( - , - , - float, - , - iXLen, iXLen, iXLen); - -define @intrinsic_vfpmadt_mask_vf_nxv8f16_nxv8f16_f32( %0, %1, float %2, %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vfpmadt_mask_vf_nxv8f16_nxv8f16_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fsrmi a1, 0 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: nds.vfpmadt.vf v8, fa0, v10, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: ret -entry: - %a = tail call @llvm.riscv.nds.vfpmadt.mask.nxv8f16.f32( - %0, - %1, - float %2, - %3, - iXLen 0, iXLen %4, iXLen 1) - - ret %a -} - -declare @llvm.riscv.nds.vfpmadt.nxv16f16.f32( - , - , - float, - iXLen, iXLen); - -define @intrinsic_vfpmadt_vf_nxv16f16_nxv16f16_f32( %0, float %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vfpmadt_vf_nxv16f16_nxv16f16_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fsrmi a1, 0 -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: nds.vfpmadt.vf v8, fa0, v8 -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: ret -entry: - %a = tail call @llvm.riscv.nds.vfpmadt.nxv16f16.f32( - undef, - %0, - float %1, iXLen 0, iXLen %2) - - ret %a -} - -declare @llvm.riscv.nds.vfpmadt.mask.nxv16f16.f32( - , - , - float, - , - iXLen, iXLen, iXLen); - -define @intrinsic_vfpmadt_mask_vf_nxv16f16_nxv16f16_f32( %0, %1, float %2, %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vfpmadt_mask_vf_nxv16f16_nxv16f16_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fsrmi a1, 0 -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: nds.vfpmadt.vf v8, fa0, v12, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: ret -entry: - %a = tail call @llvm.riscv.nds.vfpmadt.mask.nxv16f16.f32( - %0, - %1, - float %2, - %3, - iXLen 0, iXLen %4, iXLen 1) - - ret %a -} - -declare @llvm.riscv.nds.vfpmadt.nxv32f16.f32( - , - , - float, - iXLen, iXLen); - -define @intrinsic_vfpmadt_vf_nxv32f16_nxv32f16_f32( %0, float %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vfpmadt_vf_nxv32f16_nxv32f16_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fsrmi a1, 0 -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: nds.vfpmadt.vf v8, fa0, v8 -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: ret -entry: - %a = tail call @llvm.riscv.nds.vfpmadt.nxv32f16.f32( - undef, - %0, - float %1, iXLen 0, iXLen %2) - - ret %a -} - -declare @llvm.riscv.nds.vfpmadt.mask.nxv32f16.f32( - , - , - float, - , - iXLen, iXLen, iXLen); - -define @intrinsic_vfpmadt_mask_vf_nxv32f16_nxv32f16_f32( %0, %1, float %2, %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vfpmadt_mask_vf_nxv32f16_nxv32f16_f32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fsrmi a1, 0 -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu -; CHECK-NEXT: nds.vfpmadt.vf v8, fa0, v16, v0.t -; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: ret -entry: - %a = tail call @llvm.riscv.nds.vfpmadt.mask.nxv32f16.f32( - %0, - %1, - float %2, - %3, - iXLen 0, iXLen %4, iXLen 1) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/zdinx-boundary-check.ll b/llvm/test/CodeGen/RISCV/zdinx-boundary-check.ll index f9db686c9e855..9a312d9daca8d 100644 --- a/llvm/test/CodeGen/RISCV/zdinx-boundary-check.ll +++ b/llvm/test/CodeGen/RISCV/zdinx-boundary-check.ll @@ -9,16 +9,20 @@ define void @foo(ptr nocapture %p, double %d) nounwind { ; RV32ZDINX-LABEL: foo: ; RV32ZDINX: # %bb.0: # %entry -; RV32ZDINX-NEXT: addi a3, a0, 2044 -; RV32ZDINX-NEXT: sw a1, 2044(a0) -; RV32ZDINX-NEXT: sw a2, 4(a3) +; RV32ZDINX-NEXT: mv a3, a2 +; RV32ZDINX-NEXT: addi a0, a0, 2047 +; RV32ZDINX-NEXT: mv a2, a1 +; RV32ZDINX-NEXT: sw a2, -3(a0) +; RV32ZDINX-NEXT: sw a3, 1(a0) ; RV32ZDINX-NEXT: ret ; ; RV32ZDINXUALIGNED-LABEL: foo: ; RV32ZDINXUALIGNED: # %bb.0: # %entry -; RV32ZDINXUALIGNED-NEXT: addi a3, a0, 2044 -; RV32ZDINXUALIGNED-NEXT: sw a1, 2044(a0) -; RV32ZDINXUALIGNED-NEXT: sw a2, 4(a3) +; RV32ZDINXUALIGNED-NEXT: mv a3, a2 +; RV32ZDINXUALIGNED-NEXT: addi a0, a0, 2047 +; RV32ZDINXUALIGNED-NEXT: mv a2, a1 +; RV32ZDINXUALIGNED-NEXT: sw a2, -3(a0) +; RV32ZDINXUALIGNED-NEXT: sw a3, 1(a0) ; RV32ZDINXUALIGNED-NEXT: ret ; ; RV64ZDINX-LABEL: foo: @@ -35,21 +39,21 @@ define void @foo2(ptr nocapture %p, double %d) nounwind { ; RV32ZDINX-LABEL: foo2: ; RV32ZDINX: # %bb.0: # %entry ; RV32ZDINX-NEXT: mv a3, a2 -; RV32ZDINX-NEXT: addi a4, a0, 2044 +; RV32ZDINX-NEXT: addi a0, a0, 2047 ; RV32ZDINX-NEXT: mv a2, a1 ; RV32ZDINX-NEXT: fadd.d a2, a2, a2 -; RV32ZDINX-NEXT: sw a3, 4(a4) -; RV32ZDINX-NEXT: sw a2, 2044(a0) +; RV32ZDINX-NEXT: sw a2, -3(a0) +; RV32ZDINX-NEXT: sw a3, 1(a0) ; RV32ZDINX-NEXT: ret ; ; RV32ZDINXUALIGNED-LABEL: foo2: ; RV32ZDINXUALIGNED: # %bb.0: # %entry ; RV32ZDINXUALIGNED-NEXT: mv a3, a2 -; RV32ZDINXUALIGNED-NEXT: addi a4, a0, 2044 +; RV32ZDINXUALIGNED-NEXT: addi a0, a0, 2047 ; RV32ZDINXUALIGNED-NEXT: mv a2, a1 ; RV32ZDINXUALIGNED-NEXT: fadd.d a2, a2, a2 -; RV32ZDINXUALIGNED-NEXT: sw a3, 4(a4) -; RV32ZDINXUALIGNED-NEXT: sw a2, 2044(a0) +; RV32ZDINXUALIGNED-NEXT: sw a2, -3(a0) +; RV32ZDINXUALIGNED-NEXT: sw a3, 1(a0) ; RV32ZDINXUALIGNED-NEXT: ret ; ; RV64ZDINX-LABEL: foo2: @@ -70,21 +74,21 @@ define void @foo3(ptr nocapture %p) nounwind { ; RV32ZDINX-LABEL: foo3: ; RV32ZDINX: # %bb.0: # %entry ; RV32ZDINX-NEXT: lui a1, %hi(d) -; RV32ZDINX-NEXT: lw a2, %lo(d+4)(a1) -; RV32ZDINX-NEXT: lw a1, %lo(d)(a1) -; RV32ZDINX-NEXT: addi a3, a0, 2044 -; RV32ZDINX-NEXT: sw a2, 4(a3) -; RV32ZDINX-NEXT: sw a1, 2044(a0) +; RV32ZDINX-NEXT: lw a2, %lo(d)(a1) +; RV32ZDINX-NEXT: lw a3, %lo(d+4)(a1) +; RV32ZDINX-NEXT: addi a0, a0, 2047 +; RV32ZDINX-NEXT: sw a2, -3(a0) +; RV32ZDINX-NEXT: sw a3, 1(a0) ; RV32ZDINX-NEXT: ret ; ; RV32ZDINXUALIGNED-LABEL: foo3: ; RV32ZDINXUALIGNED: # %bb.0: # %entry ; RV32ZDINXUALIGNED-NEXT: lui a1, %hi(d) -; RV32ZDINXUALIGNED-NEXT: lw a2, %lo(d+4)(a1) -; RV32ZDINXUALIGNED-NEXT: lw a1, %lo(d)(a1) -; RV32ZDINXUALIGNED-NEXT: addi a3, a0, 2044 -; RV32ZDINXUALIGNED-NEXT: sw a2, 4(a3) -; RV32ZDINXUALIGNED-NEXT: sw a1, 2044(a0) +; RV32ZDINXUALIGNED-NEXT: lw a2, %lo(d)(a1) +; RV32ZDINXUALIGNED-NEXT: lw a3, %lo(d+4)(a1) +; RV32ZDINXUALIGNED-NEXT: addi a0, a0, 2047 +; RV32ZDINXUALIGNED-NEXT: sw a2, -3(a0) +; RV32ZDINXUALIGNED-NEXT: sw a3, 1(a0) ; RV32ZDINXUALIGNED-NEXT: ret ; ; RV64ZDINX-LABEL: foo3: @@ -104,26 +108,26 @@ define void @foo4(ptr %p) nounwind { ; RV32ZDINX-LABEL: foo4: ; RV32ZDINX: # %bb.0: # %entry ; RV32ZDINX-NEXT: addi sp, sp, -16 -; RV32ZDINX-NEXT: addi a1, a0, 2044 -; RV32ZDINX-NEXT: lw a2, 2044(a0) -; RV32ZDINX-NEXT: lw a1, 4(a1) +; RV32ZDINX-NEXT: addi a1, a0, 2047 +; RV32ZDINX-NEXT: lw a2, -3(a1) +; RV32ZDINX-NEXT: lw a3, 1(a1) ; RV32ZDINX-NEXT: sw a0, 8(sp) ; RV32ZDINX-NEXT: lui a0, %hi(d) ; RV32ZDINX-NEXT: sw a2, %lo(d)(a0) -; RV32ZDINX-NEXT: sw a1, %lo(d+4)(a0) +; RV32ZDINX-NEXT: sw a3, %lo(d+4)(a0) ; RV32ZDINX-NEXT: addi sp, sp, 16 ; RV32ZDINX-NEXT: ret ; ; RV32ZDINXUALIGNED-LABEL: foo4: ; RV32ZDINXUALIGNED: # %bb.0: # %entry ; RV32ZDINXUALIGNED-NEXT: addi sp, sp, -16 -; RV32ZDINXUALIGNED-NEXT: addi a1, a0, 2044 -; RV32ZDINXUALIGNED-NEXT: lw a2, 2044(a0) -; RV32ZDINXUALIGNED-NEXT: lw a1, 4(a1) +; RV32ZDINXUALIGNED-NEXT: addi a1, a0, 2047 +; RV32ZDINXUALIGNED-NEXT: lw a2, -3(a1) +; RV32ZDINXUALIGNED-NEXT: lw a3, 1(a1) ; RV32ZDINXUALIGNED-NEXT: sw a0, 8(sp) ; RV32ZDINXUALIGNED-NEXT: lui a0, %hi(d) ; RV32ZDINXUALIGNED-NEXT: sw a2, %lo(d)(a0) -; RV32ZDINXUALIGNED-NEXT: sw a1, %lo(d+4)(a0) +; RV32ZDINXUALIGNED-NEXT: sw a3, %lo(d+4)(a0) ; RV32ZDINXUALIGNED-NEXT: addi sp, sp, 16 ; RV32ZDINXUALIGNED-NEXT: ret ; @@ -149,16 +153,20 @@ entry: define void @foo5(ptr nocapture %p, double %d) nounwind { ; RV32ZDINX-LABEL: foo5: ; RV32ZDINX: # %bb.0: # %entry -; RV32ZDINX-NEXT: addi a3, a0, -2048 -; RV32ZDINX-NEXT: sw a2, -2045(a0) -; RV32ZDINX-NEXT: sw a1, -1(a3) +; RV32ZDINX-NEXT: mv a3, a2 +; RV32ZDINX-NEXT: addi a0, a0, -2048 +; RV32ZDINX-NEXT: mv a2, a1 +; RV32ZDINX-NEXT: sw a2, -1(a0) +; RV32ZDINX-NEXT: sw a3, 3(a0) ; RV32ZDINX-NEXT: ret ; ; RV32ZDINXUALIGNED-LABEL: foo5: ; RV32ZDINXUALIGNED: # %bb.0: # %entry -; RV32ZDINXUALIGNED-NEXT: addi a3, a0, -2048 -; RV32ZDINXUALIGNED-NEXT: sw a2, -2045(a0) -; RV32ZDINXUALIGNED-NEXT: sw a1, -1(a3) +; RV32ZDINXUALIGNED-NEXT: mv a3, a2 +; RV32ZDINXUALIGNED-NEXT: addi a0, a0, -2048 +; RV32ZDINXUALIGNED-NEXT: mv a2, a1 +; RV32ZDINXUALIGNED-NEXT: sw a2, -1(a0) +; RV32ZDINXUALIGNED-NEXT: sw a3, 3(a0) ; RV32ZDINXUALIGNED-NEXT: ret ; ; RV64ZDINX-LABEL: foo5: @@ -178,13 +186,12 @@ define void @foo6(ptr %p, double %d) nounwind { ; RV32ZDINX-NEXT: mv a3, a2 ; RV32ZDINX-NEXT: lui a2, %hi(.LCPI5_0) ; RV32ZDINX-NEXT: lw a4, %lo(.LCPI5_0)(a2) -; RV32ZDINX-NEXT: addi a2, a2, %lo(.LCPI5_0) -; RV32ZDINX-NEXT: lw a5, 4(a2) +; RV32ZDINX-NEXT: lw a5, %lo(.LCPI5_0+4)(a2) ; RV32ZDINX-NEXT: mv a2, a1 -; RV32ZDINX-NEXT: addi a1, a0, 2044 ; RV32ZDINX-NEXT: fadd.d a2, a2, a4 -; RV32ZDINX-NEXT: sw a3, 4(a1) -; RV32ZDINX-NEXT: sw a2, 2044(a0) +; RV32ZDINX-NEXT: addi a0, a0, 2047 +; RV32ZDINX-NEXT: sw a2, -3(a0) +; RV32ZDINX-NEXT: sw a3, 1(a0) ; RV32ZDINX-NEXT: ret ; ; RV32ZDINXUALIGNED-LABEL: foo6: @@ -192,13 +199,12 @@ define void @foo6(ptr %p, double %d) nounwind { ; RV32ZDINXUALIGNED-NEXT: mv a3, a2 ; RV32ZDINXUALIGNED-NEXT: lui a2, %hi(.LCPI5_0) ; RV32ZDINXUALIGNED-NEXT: lw a4, %lo(.LCPI5_0)(a2) -; RV32ZDINXUALIGNED-NEXT: addi a2, a2, %lo(.LCPI5_0) -; RV32ZDINXUALIGNED-NEXT: lw a5, 4(a2) +; RV32ZDINXUALIGNED-NEXT: lw a5, %lo(.LCPI5_0+4)(a2) ; RV32ZDINXUALIGNED-NEXT: mv a2, a1 -; RV32ZDINXUALIGNED-NEXT: addi a1, a0, 2044 ; RV32ZDINXUALIGNED-NEXT: fadd.d a2, a2, a4 -; RV32ZDINXUALIGNED-NEXT: sw a3, 4(a1) -; RV32ZDINXUALIGNED-NEXT: sw a2, 2044(a0) +; RV32ZDINXUALIGNED-NEXT: addi a0, a0, 2047 +; RV32ZDINXUALIGNED-NEXT: sw a2, -3(a0) +; RV32ZDINXUALIGNED-NEXT: sw a3, 1(a0) ; RV32ZDINXUALIGNED-NEXT: ret ; ; RV64ZDINX-LABEL: foo6: @@ -218,24 +224,30 @@ entry: define void @foo7(ptr nocapture %p) nounwind { ; RV32ZDINX-LABEL: foo7: ; RV32ZDINX: # %bb.0: # %entry +; RV32ZDINX-NEXT: addi sp, sp, -16 ; RV32ZDINX-NEXT: lui a1, %hi(d) -; RV32ZDINX-NEXT: addi a2, a1, %lo(d) -; RV32ZDINX-NEXT: lw a1, %lo(d+4)(a1) -; RV32ZDINX-NEXT: lw a2, 8(a2) -; RV32ZDINX-NEXT: addi a3, a0, 2044 -; RV32ZDINX-NEXT: sw a1, 2044(a0) -; RV32ZDINX-NEXT: sw a2, 4(a3) +; RV32ZDINX-NEXT: lw a2, %lo(d+4)(a1) +; RV32ZDINX-NEXT: addi a1, a1, %lo(d) +; RV32ZDINX-NEXT: sw a2, 8(sp) +; RV32ZDINX-NEXT: lw a1, 8(a1) +; RV32ZDINX-NEXT: sw a1, 12(sp) +; RV32ZDINX-NEXT: lw a2, 8(sp) +; RV32ZDINX-NEXT: lw a3, 12(sp) +; RV32ZDINX-NEXT: addi a0, a0, 2047 +; RV32ZDINX-NEXT: sw a2, -3(a0) +; RV32ZDINX-NEXT: sw a3, 1(a0) +; RV32ZDINX-NEXT: addi sp, sp, 16 ; RV32ZDINX-NEXT: ret ; ; RV32ZDINXUALIGNED-LABEL: foo7: ; RV32ZDINXUALIGNED: # %bb.0: # %entry ; RV32ZDINXUALIGNED-NEXT: lui a1, %hi(d) -; RV32ZDINXUALIGNED-NEXT: addi a2, a1, %lo(d) -; RV32ZDINXUALIGNED-NEXT: lw a1, %lo(d+4)(a1) -; RV32ZDINXUALIGNED-NEXT: lw a2, 8(a2) -; RV32ZDINXUALIGNED-NEXT: addi a3, a0, 2044 -; RV32ZDINXUALIGNED-NEXT: sw a1, 2044(a0) -; RV32ZDINXUALIGNED-NEXT: sw a2, 4(a3) +; RV32ZDINXUALIGNED-NEXT: addi a1, a1, %lo(d) +; RV32ZDINXUALIGNED-NEXT: lw a2, 4(a1) +; RV32ZDINXUALIGNED-NEXT: lw a3, 8(a1) +; RV32ZDINXUALIGNED-NEXT: addi a0, a0, 2047 +; RV32ZDINXUALIGNED-NEXT: sw a2, -3(a0) +; RV32ZDINXUALIGNED-NEXT: sw a3, 1(a0) ; RV32ZDINXUALIGNED-NEXT: ret ; ; RV64ZDINX-LABEL: foo7: @@ -260,28 +272,32 @@ define void @foo8(ptr %p) nounwind { ; RV32ZDINX-LABEL: foo8: ; RV32ZDINX: # %bb.0: # %entry ; RV32ZDINX-NEXT: addi sp, sp, -16 +; RV32ZDINX-NEXT: addi a1, a0, 2047 +; RV32ZDINX-NEXT: lw a2, -3(a1) +; RV32ZDINX-NEXT: lw a3, 1(a1) ; RV32ZDINX-NEXT: sw a0, 8(sp) -; RV32ZDINX-NEXT: addi a1, a0, 2044 -; RV32ZDINX-NEXT: lw a0, 2044(a0) -; RV32ZDINX-NEXT: lw a1, 4(a1) -; RV32ZDINX-NEXT: lui a2, %hi(d) -; RV32ZDINX-NEXT: addi a3, a2, %lo(d) -; RV32ZDINX-NEXT: sw a0, %lo(d+4)(a2) -; RV32ZDINX-NEXT: sw a1, 8(a3) +; RV32ZDINX-NEXT: sw a2, 0(sp) +; RV32ZDINX-NEXT: sw a3, 4(sp) +; RV32ZDINX-NEXT: lw a0, 4(sp) +; RV32ZDINX-NEXT: lui a1, %hi(d) +; RV32ZDINX-NEXT: addi a2, a1, %lo(d) +; RV32ZDINX-NEXT: sw a0, 8(a2) +; RV32ZDINX-NEXT: lw a0, 0(sp) +; RV32ZDINX-NEXT: sw a0, %lo(d+4)(a1) ; RV32ZDINX-NEXT: addi sp, sp, 16 ; RV32ZDINX-NEXT: ret ; ; RV32ZDINXUALIGNED-LABEL: foo8: ; RV32ZDINXUALIGNED: # %bb.0: # %entry ; RV32ZDINXUALIGNED-NEXT: addi sp, sp, -16 +; RV32ZDINXUALIGNED-NEXT: addi a1, a0, 2047 +; RV32ZDINXUALIGNED-NEXT: lw a2, -3(a1) +; RV32ZDINXUALIGNED-NEXT: lw a3, 1(a1) ; RV32ZDINXUALIGNED-NEXT: sw a0, 8(sp) -; RV32ZDINXUALIGNED-NEXT: addi a1, a0, 2044 -; RV32ZDINXUALIGNED-NEXT: lw a0, 2044(a0) -; RV32ZDINXUALIGNED-NEXT: lw a1, 4(a1) -; RV32ZDINXUALIGNED-NEXT: lui a2, %hi(d) -; RV32ZDINXUALIGNED-NEXT: addi a3, a2, %lo(d) -; RV32ZDINXUALIGNED-NEXT: sw a0, %lo(d+4)(a2) -; RV32ZDINXUALIGNED-NEXT: sw a1, 8(a3) +; RV32ZDINXUALIGNED-NEXT: lui a0, %hi(d) +; RV32ZDINXUALIGNED-NEXT: addi a0, a0, %lo(d) +; RV32ZDINXUALIGNED-NEXT: sw a2, 4(a0) +; RV32ZDINXUALIGNED-NEXT: sw a3, 8(a0) ; RV32ZDINXUALIGNED-NEXT: addi sp, sp, 16 ; RV32ZDINXUALIGNED-NEXT: ret ; @@ -313,24 +329,30 @@ entry: define void @foo9(ptr nocapture %p) nounwind { ; RV32ZDINX-LABEL: foo9: ; RV32ZDINX: # %bb.0: # %entry +; RV32ZDINX-NEXT: addi sp, sp, -16 ; RV32ZDINX-NEXT: lui a1, %hi(e) -; RV32ZDINX-NEXT: addi a2, a1, %lo(e) -; RV32ZDINX-NEXT: lw a1, %lo(e)(a1) -; RV32ZDINX-NEXT: lw a2, 4(a2) -; RV32ZDINX-NEXT: addi a3, a0, 2044 -; RV32ZDINX-NEXT: sw a1, 2044(a0) -; RV32ZDINX-NEXT: sw a2, 4(a3) +; RV32ZDINX-NEXT: lw a2, %lo(e)(a1) +; RV32ZDINX-NEXT: sw a2, 8(sp) +; RV32ZDINX-NEXT: addi a1, a1, %lo(e) +; RV32ZDINX-NEXT: lw a1, 4(a1) +; RV32ZDINX-NEXT: sw a1, 12(sp) +; RV32ZDINX-NEXT: lw a2, 8(sp) +; RV32ZDINX-NEXT: lw a3, 12(sp) +; RV32ZDINX-NEXT: addi a0, a0, 2047 +; RV32ZDINX-NEXT: sw a2, -3(a0) +; RV32ZDINX-NEXT: sw a3, 1(a0) +; RV32ZDINX-NEXT: addi sp, sp, 16 ; RV32ZDINX-NEXT: ret ; ; RV32ZDINXUALIGNED-LABEL: foo9: ; RV32ZDINXUALIGNED: # %bb.0: # %entry ; RV32ZDINXUALIGNED-NEXT: lui a1, %hi(e) -; RV32ZDINXUALIGNED-NEXT: addi a2, a1, %lo(e) -; RV32ZDINXUALIGNED-NEXT: lw a1, %lo(e)(a1) -; RV32ZDINXUALIGNED-NEXT: lw a2, 4(a2) -; RV32ZDINXUALIGNED-NEXT: addi a3, a0, 2044 -; RV32ZDINXUALIGNED-NEXT: sw a1, 2044(a0) -; RV32ZDINXUALIGNED-NEXT: sw a2, 4(a3) +; RV32ZDINXUALIGNED-NEXT: addi a1, a1, %lo(e) +; RV32ZDINXUALIGNED-NEXT: lw a2, 0(a1) +; RV32ZDINXUALIGNED-NEXT: lw a3, 4(a1) +; RV32ZDINXUALIGNED-NEXT: addi a0, a0, 2047 +; RV32ZDINXUALIGNED-NEXT: sw a2, -3(a0) +; RV32ZDINXUALIGNED-NEXT: sw a3, 1(a0) ; RV32ZDINXUALIGNED-NEXT: ret ; ; RV64ZDINX-LABEL: foo9: @@ -354,28 +376,32 @@ define void @foo10(ptr %p) nounwind { ; RV32ZDINX-LABEL: foo10: ; RV32ZDINX: # %bb.0: # %entry ; RV32ZDINX-NEXT: addi sp, sp, -16 +; RV32ZDINX-NEXT: addi a1, a0, 2047 +; RV32ZDINX-NEXT: lw a2, -3(a1) +; RV32ZDINX-NEXT: lw a3, 1(a1) ; RV32ZDINX-NEXT: sw a0, 8(sp) -; RV32ZDINX-NEXT: lw a1, 2044(a0) -; RV32ZDINX-NEXT: addi a0, a0, 2044 -; RV32ZDINX-NEXT: lw a0, 4(a0) -; RV32ZDINX-NEXT: lui a2, %hi(e) -; RV32ZDINX-NEXT: sw a1, %lo(e)(a2) -; RV32ZDINX-NEXT: addi a1, a2, %lo(e) -; RV32ZDINX-NEXT: sw a0, 4(a1) +; RV32ZDINX-NEXT: sw a2, 0(sp) +; RV32ZDINX-NEXT: sw a3, 4(sp) +; RV32ZDINX-NEXT: lw a0, 4(sp) +; RV32ZDINX-NEXT: lui a1, %hi(e) +; RV32ZDINX-NEXT: addi a2, a1, %lo(e) +; RV32ZDINX-NEXT: sw a0, 4(a2) +; RV32ZDINX-NEXT: lw a0, 0(sp) +; RV32ZDINX-NEXT: sw a0, %lo(e)(a1) ; RV32ZDINX-NEXT: addi sp, sp, 16 ; RV32ZDINX-NEXT: ret ; ; RV32ZDINXUALIGNED-LABEL: foo10: ; RV32ZDINXUALIGNED: # %bb.0: # %entry ; RV32ZDINXUALIGNED-NEXT: addi sp, sp, -16 +; RV32ZDINXUALIGNED-NEXT: addi a1, a0, 2047 +; RV32ZDINXUALIGNED-NEXT: lw a2, -3(a1) +; RV32ZDINXUALIGNED-NEXT: lw a3, 1(a1) ; RV32ZDINXUALIGNED-NEXT: sw a0, 8(sp) -; RV32ZDINXUALIGNED-NEXT: lw a1, 2044(a0) -; RV32ZDINXUALIGNED-NEXT: addi a0, a0, 2044 -; RV32ZDINXUALIGNED-NEXT: lw a0, 4(a0) -; RV32ZDINXUALIGNED-NEXT: lui a2, %hi(e) -; RV32ZDINXUALIGNED-NEXT: sw a1, %lo(e)(a2) -; RV32ZDINXUALIGNED-NEXT: addi a1, a2, %lo(e) -; RV32ZDINXUALIGNED-NEXT: sw a0, 4(a1) +; RV32ZDINXUALIGNED-NEXT: lui a0, %hi(e) +; RV32ZDINXUALIGNED-NEXT: addi a0, a0, %lo(e) +; RV32ZDINXUALIGNED-NEXT: sw a2, 0(a0) +; RV32ZDINXUALIGNED-NEXT: sw a3, 4(a0) ; RV32ZDINXUALIGNED-NEXT: addi sp, sp, 16 ; RV32ZDINXUALIGNED-NEXT: ret ; @@ -404,18 +430,22 @@ entry: define void @foo11(ptr nocapture %p, double %d) nounwind { ; RV32ZDINX-LABEL: foo11: ; RV32ZDINX: # %bb.0: # %entry -; RV32ZDINX-NEXT: addi a0, a0, 2047 -; RV32ZDINX-NEXT: addi a3, a0, 2045 -; RV32ZDINX-NEXT: sw a1, 2045(a0) -; RV32ZDINX-NEXT: sw a2, 4(a3) +; RV32ZDINX-NEXT: mv a3, a2 +; RV32ZDINX-NEXT: lui a2, 1 +; RV32ZDINX-NEXT: add a0, a0, a2 +; RV32ZDINX-NEXT: mv a2, a1 +; RV32ZDINX-NEXT: sw a2, -4(a0) +; RV32ZDINX-NEXT: sw a3, 0(a0) ; RV32ZDINX-NEXT: ret ; ; RV32ZDINXUALIGNED-LABEL: foo11: ; RV32ZDINXUALIGNED: # %bb.0: # %entry -; RV32ZDINXUALIGNED-NEXT: addi a0, a0, 2047 -; RV32ZDINXUALIGNED-NEXT: addi a3, a0, 2045 -; RV32ZDINXUALIGNED-NEXT: sw a1, 2045(a0) -; RV32ZDINXUALIGNED-NEXT: sw a2, 4(a3) +; RV32ZDINXUALIGNED-NEXT: mv a3, a2 +; RV32ZDINXUALIGNED-NEXT: lui a2, 1 +; RV32ZDINXUALIGNED-NEXT: add a0, a0, a2 +; RV32ZDINXUALIGNED-NEXT: mv a2, a1 +; RV32ZDINXUALIGNED-NEXT: sw a2, -4(a0) +; RV32ZDINXUALIGNED-NEXT: sw a3, 0(a0) ; RV32ZDINXUALIGNED-NEXT: ret ; ; RV64ZDINX-LABEL: foo11: @@ -432,20 +462,24 @@ entry: define void @foo12(ptr nocapture %p, double %d) nounwind { ; RV32ZDINX-LABEL: foo12: ; RV32ZDINX: # %bb.0: # %entry -; RV32ZDINX-NEXT: lui a3, 2 -; RV32ZDINX-NEXT: addi a3, a3, 2047 -; RV32ZDINX-NEXT: add a0, a0, a3 -; RV32ZDINX-NEXT: sw a1, 0(a0) -; RV32ZDINX-NEXT: sw a2, 4(a0) +; RV32ZDINX-NEXT: mv a3, a2 +; RV32ZDINX-NEXT: lui a2, 2 +; RV32ZDINX-NEXT: addi a2, a2, 2047 +; RV32ZDINX-NEXT: add a0, a0, a2 +; RV32ZDINX-NEXT: mv a2, a1 +; RV32ZDINX-NEXT: sw a2, 0(a0) +; RV32ZDINX-NEXT: sw a3, 4(a0) ; RV32ZDINX-NEXT: ret ; ; RV32ZDINXUALIGNED-LABEL: foo12: ; RV32ZDINXUALIGNED: # %bb.0: # %entry -; RV32ZDINXUALIGNED-NEXT: lui a3, 2 -; RV32ZDINXUALIGNED-NEXT: addi a3, a3, 2047 -; RV32ZDINXUALIGNED-NEXT: add a0, a0, a3 -; RV32ZDINXUALIGNED-NEXT: sw a1, 0(a0) -; RV32ZDINXUALIGNED-NEXT: sw a2, 4(a0) +; RV32ZDINXUALIGNED-NEXT: mv a3, a2 +; RV32ZDINXUALIGNED-NEXT: lui a2, 2 +; RV32ZDINXUALIGNED-NEXT: addi a2, a2, 2047 +; RV32ZDINXUALIGNED-NEXT: add a0, a0, a2 +; RV32ZDINXUALIGNED-NEXT: mv a2, a1 +; RV32ZDINXUALIGNED-NEXT: sw a2, 0(a0) +; RV32ZDINXUALIGNED-NEXT: sw a3, 4(a0) ; RV32ZDINXUALIGNED-NEXT: ret ; ; RV64ZDINX-LABEL: foo12: @@ -465,16 +499,23 @@ entry: define double @foo13(ptr nocapture %p) nounwind { ; RV32ZDINX-LABEL: foo13: ; RV32ZDINX: # %bb.0: # %entry -; RV32ZDINX-NEXT: lui a1, %hi(f) -; RV32ZDINX-NEXT: lw a0, %lo(f+4)(a1) -; RV32ZDINX-NEXT: lw a1, %lo(f+8)(a1) +; RV32ZDINX-NEXT: addi sp, sp, -16 +; RV32ZDINX-NEXT: lui a0, %hi(f) +; RV32ZDINX-NEXT: lw a1, %lo(f+8)(a0) +; RV32ZDINX-NEXT: sw a1, 12(sp) +; RV32ZDINX-NEXT: lw a0, %lo(f+4)(a0) +; RV32ZDINX-NEXT: sw a0, 8(sp) +; RV32ZDINX-NEXT: lw a0, 8(sp) +; RV32ZDINX-NEXT: lw a1, 12(sp) +; RV32ZDINX-NEXT: addi sp, sp, 16 ; RV32ZDINX-NEXT: ret ; ; RV32ZDINXUALIGNED-LABEL: foo13: ; RV32ZDINXUALIGNED: # %bb.0: # %entry -; RV32ZDINXUALIGNED-NEXT: lui a1, %hi(f) -; RV32ZDINXUALIGNED-NEXT: lw a0, %lo(f+4)(a1) -; RV32ZDINXUALIGNED-NEXT: lw a1, %lo(f+8)(a1) +; RV32ZDINXUALIGNED-NEXT: lui a0, %hi(f) +; RV32ZDINXUALIGNED-NEXT: addi a0, a0, %lo(f) +; RV32ZDINXUALIGNED-NEXT: lw a1, 8(a0) +; RV32ZDINXUALIGNED-NEXT: lw a0, 4(a0) ; RV32ZDINXUALIGNED-NEXT: ret ; ; RV64ZDINX-LABEL: foo13: @@ -494,16 +535,16 @@ entry: define double @foo14(ptr nocapture %p) nounwind { ; RV32ZDINX-LABEL: foo14: ; RV32ZDINX: # %bb.0: # %entry -; RV32ZDINX-NEXT: lui a1, %hi(f) -; RV32ZDINX-NEXT: lw a0, %lo(f+8)(a1) -; RV32ZDINX-NEXT: lw a1, %lo(f+12)(a1) +; RV32ZDINX-NEXT: lui a0, %hi(f) +; RV32ZDINX-NEXT: lw a1, %lo(f+12)(a0) +; RV32ZDINX-NEXT: lw a0, %lo(f+8)(a0) ; RV32ZDINX-NEXT: ret ; ; RV32ZDINXUALIGNED-LABEL: foo14: ; RV32ZDINXUALIGNED: # %bb.0: # %entry -; RV32ZDINXUALIGNED-NEXT: lui a1, %hi(f) -; RV32ZDINXUALIGNED-NEXT: lw a0, %lo(f+8)(a1) -; RV32ZDINXUALIGNED-NEXT: lw a1, %lo(f+12)(a1) +; RV32ZDINXUALIGNED-NEXT: lui a0, %hi(f) +; RV32ZDINXUALIGNED-NEXT: lw a1, %lo(f+12)(a0) +; RV32ZDINXUALIGNED-NEXT: lw a0, %lo(f+8)(a0) ; RV32ZDINXUALIGNED-NEXT: ret ; ; RV64ZDINX-LABEL: foo14: diff --git a/llvm/test/CodeGen/RISCV/zdinx-large-spill.mir b/llvm/test/CodeGen/RISCV/zdinx-large-spill.mir index f8b2b542a497d..caebdab2c95ab 100644 --- a/llvm/test/CodeGen/RISCV/zdinx-large-spill.mir +++ b/llvm/test/CodeGen/RISCV/zdinx-large-spill.mir @@ -10,40 +10,34 @@ ; CHECK-LABEL: foo: ; CHECK: # %bb.0: ; CHECK-NEXT: addi sp, sp, -2048 - ; CHECK-NEXT: addi sp, sp, -32 - ; CHECK-NEXT: .cfi_def_cfa_offset 2080 + ; CHECK-NEXT: addi sp, sp, -16 + ; CHECK-NEXT: .cfi_def_cfa_offset 2064 ; CHECK-NEXT: lui t0, 1 ; CHECK-NEXT: add t0, sp, t0 - ; CHECK-NEXT: sw a0, -2024(t0) # 4-byte Folded Spill - ; CHECK-NEXT: sw a1, -2020(t0) # 4-byte Folded Spill + ; CHECK-NEXT: sw a0, -2040(t0) # 4-byte Folded Spill + ; CHECK-NEXT: sw a1, -2036(t0) # 4-byte Folded Spill ; CHECK-NEXT: lui a0, 1 ; CHECK-NEXT: add a0, sp, a0 - ; CHECK-NEXT: sw a2, -2032(a0) # 4-byte Folded Spill - ; CHECK-NEXT: sw a3, -2028(a0) # 4-byte Folded Spill + ; CHECK-NEXT: sw a2, -2048(a0) # 4-byte Folded Spill + ; CHECK-NEXT: sw a3, -2044(a0) # 4-byte Folded Spill + ; CHECK-NEXT: sw a4, 2040(sp) # 4-byte Folded Spill + ; CHECK-NEXT: sw a5, 2044(sp) # 4-byte Folded Spill + ; CHECK-NEXT: sw a6, 2032(sp) # 4-byte Folded Spill + ; CHECK-NEXT: sw a7, 2036(sp) # 4-byte Folded Spill ; CHECK-NEXT: lui a0, 1 ; CHECK-NEXT: add a0, sp, a0 - ; CHECK-NEXT: sw a4, -2040(a0) # 4-byte Folded Spill - ; CHECK-NEXT: sw a5, -2036(a0) # 4-byte Folded Spill - ; CHECK-NEXT: addi a0, sp, 2044 - ; CHECK-NEXT: sw a6, 0(a0) # 4-byte Folded Spill - ; CHECK-NEXT: sw a7, 4(a0) # 4-byte Folded Spill + ; CHECK-NEXT: lw a1, -2036(a0) # 4-byte Folded Reload + ; CHECK-NEXT: lw a0, -2040(a0) # 4-byte Folded Reload ; CHECK-NEXT: lui a0, 1 ; CHECK-NEXT: add a0, sp, a0 - ; CHECK-NEXT: lw a1, -2020(a0) # 4-byte Folded Reload - ; CHECK-NEXT: lw a0, -2024(a0) # 4-byte Folded Reload - ; CHECK-NEXT: lui a0, 1 - ; CHECK-NEXT: add a0, sp, a0 - ; CHECK-NEXT: lw a2, -2032(a0) # 4-byte Folded Reload - ; CHECK-NEXT: lw a3, -2028(a0) # 4-byte Folded Reload - ; CHECK-NEXT: lui a0, 1 - ; CHECK-NEXT: add a0, sp, a0 - ; CHECK-NEXT: lw a4, -2040(a0) # 4-byte Folded Reload - ; CHECK-NEXT: lw a5, -2036(a0) # 4-byte Folded Reload - ; CHECK-NEXT: addi a0, sp, 2044 - ; CHECK-NEXT: lw a6, 0(a0) # 4-byte Folded Reload - ; CHECK-NEXT: lw a7, 4(a0) # 4-byte Folded Reload + ; CHECK-NEXT: lw a2, -2048(a0) # 4-byte Folded Reload + ; CHECK-NEXT: lw a3, -2044(a0) # 4-byte Folded Reload + ; CHECK-NEXT: lw a4, 2040(sp) # 4-byte Folded Reload + ; CHECK-NEXT: lw a5, 2044(sp) # 4-byte Folded Reload + ; CHECK-NEXT: lw a6, 2032(sp) # 4-byte Folded Reload + ; CHECK-NEXT: lw a7, 2036(sp) # 4-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 2032 - ; CHECK-NEXT: addi sp, sp, 48 + ; CHECK-NEXT: addi sp, sp, 32 ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret ret void @@ -59,9 +53,8 @@ stack: - { id: 0, type: spill-slot, size: 8, alignment: 4 } - { id: 1, type: spill-slot, size: 8, alignment: 4 } - { id: 2, type: spill-slot, size: 8, alignment: 4 } - - { id: 3, type: spill-slot, size: 4, alignment: 4 } - - { id: 4, type: spill-slot, size: 8, alignment: 4 } - - { id: 5, type: spill-slot, size: 2028, alignment: 4 } + - { id: 3, type: spill-slot, size: 8, alignment: 4 } + - { id: 4, type: spill-slot, size: 2024, alignment: 4 } machineFunctionInfo: varArgsFrameIndex: 0 varArgsSaveSize: 0 @@ -72,11 +65,11 @@ body: | PseudoRV32ZdinxSD killed renamable $x10_x11, %stack.0, 0 :: (store (s64) into %stack.0, align 4) PseudoRV32ZdinxSD killed renamable $x12_x13, %stack.1, 0 :: (store (s64) into %stack.1, align 4) PseudoRV32ZdinxSD killed renamable $x14_x15, %stack.2, 0 :: (store (s64) into %stack.2, align 4) - PseudoRV32ZdinxSD killed renamable $x16_x17, %stack.4, 0 :: (store (s64) into %stack.4, align 4) + PseudoRV32ZdinxSD killed renamable $x16_x17, %stack.3, 0 :: (store (s64) into %stack.3, align 4) renamable $x10_x11 = PseudoRV32ZdinxLD %stack.0, 0 :: (load (s64) from %stack.0, align 4) renamable $x12_x13 = PseudoRV32ZdinxLD %stack.1, 0 :: (load (s64) from %stack.1, align 4) renamable $x14_x15 = PseudoRV32ZdinxLD %stack.2, 0 :: (load (s64) from %stack.2, align 4) - renamable $x16_x17 = PseudoRV32ZdinxLD %stack.4, 0 :: (load (s64) from %stack.4, align 4) + renamable $x16_x17 = PseudoRV32ZdinxLD %stack.3, 0 :: (load (s64) from %stack.3, align 4) PseudoRET ... diff --git a/llvm/test/CodeGen/RISCV/zdinx-memoperand.ll b/llvm/test/CodeGen/RISCV/zdinx-memoperand.ll index 8cb7b79f3ff6a..d618253912470 100644 --- a/llvm/test/CodeGen/RISCV/zdinx-memoperand.ll +++ b/llvm/test/CodeGen/RISCV/zdinx-memoperand.ll @@ -15,8 +15,9 @@ define i32 @foo(double %x, ptr %y, i64 %0, i64 %1, i1 %cmp6.not, ptr %arrayidx13 ; CHECK-NEXT: .LBB0_2: # %if.then7 ; CHECK-NEXT: lw a0, 0(sp) ; CHECK-NEXT: .LBB0_3: # %common.ret -; CHECK-NEXT: sw zero, 0(a0) -; CHECK-NEXT: sw zero, 4(a0) +; CHECK-NEXT: fcvt.d.w a2, zero +; CHECK-NEXT: sw a2, 0(a0) +; CHECK-NEXT: sw a3, 4(a0) ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/zdinx-spill.ll b/llvm/test/CodeGen/RISCV/zdinx-spill.ll deleted file mode 100644 index d7a700622bf8c..0000000000000 --- a/llvm/test/CodeGen/RISCV/zdinx-spill.ll +++ /dev/null @@ -1,71 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -mtriple=riscv32 -mattr=+zdinx -verify-machineinstrs -stop-after=prologepilog | FileCheck %s - -declare void @bar() - -define double @foo(double %x) nounwind { - ; CHECK-LABEL: name: foo - ; CHECK: bb.0 (%ir-block.0): - ; CHECK-NEXT: liveins: $x10, $x11, $x8, $x9, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $x2 = frame-setup ADDI $x2, -64 - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 64 - ; CHECK-NEXT: frame-setup SW killed $x8, $x2, 60 :: (store (s32) into %stack.1) - ; CHECK-NEXT: frame-setup SW killed $x9, $x2, 56 :: (store (s32) into %stack.2) - ; CHECK-NEXT: frame-setup SW killed $x18, $x2, 52 :: (store (s32) into %stack.3) - ; CHECK-NEXT: frame-setup SW killed $x19, $x2, 48 :: (store (s32) into %stack.4) - ; CHECK-NEXT: frame-setup SW killed $x20, $x2, 44 :: (store (s32) into %stack.5) - ; CHECK-NEXT: frame-setup SW killed $x21, $x2, 40 :: (store (s32) into %stack.6) - ; CHECK-NEXT: frame-setup SW killed $x22, $x2, 36 :: (store (s32) into %stack.7) - ; CHECK-NEXT: frame-setup SW killed $x23, $x2, 32 :: (store (s32) into %stack.8) - ; CHECK-NEXT: frame-setup SW killed $x24, $x2, 28 :: (store (s32) into %stack.9) - ; CHECK-NEXT: frame-setup SW killed $x25, $x2, 24 :: (store (s32) into %stack.10) - ; CHECK-NEXT: frame-setup SW killed $x26, $x2, 20 :: (store (s32) into %stack.11) - ; CHECK-NEXT: frame-setup SW killed $x27, $x2, 16 :: (store (s32) into %stack.12) - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $x8, -4 - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $x9, -8 - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $x18, -12 - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $x19, -16 - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $x20, -20 - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $x21, -24 - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $x22, -28 - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $x23, -32 - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $x24, -36 - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $x25, -40 - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $x26, -44 - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $x27, -48 - ; CHECK-NEXT: renamable $x10_x11 = nofpexcept FADD_D_IN32X killed renamable $x10_x11, renamable $x10_x11, 7, implicit $frm - ; CHECK-NEXT: PseudoRV32ZdinxSD killed renamable $x10_x11, $x2, 8 :: (store (s64) into %stack.0, align 4) - ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $x6, 12 /* clobber */, implicit-def dead early-clobber $x7, 12 /* clobber */, implicit-def dead early-clobber $x8, 12 /* clobber */, implicit-def dead early-clobber $x9, 12 /* clobber */, implicit-def dead early-clobber $x10, 12 /* clobber */, implicit-def dead early-clobber $x11, 12 /* clobber */, implicit-def dead early-clobber $x12, 12 /* clobber */, implicit-def dead early-clobber $x13, 12 /* clobber */, implicit-def dead early-clobber $x14, 12 /* clobber */, implicit-def dead early-clobber $x15, 12 /* clobber */, implicit-def dead early-clobber $x16, 12 /* clobber */, implicit-def dead early-clobber $x17, 12 /* clobber */, implicit-def dead early-clobber $x18, 12 /* clobber */, implicit-def dead early-clobber $x19, 12 /* clobber */, implicit-def dead early-clobber $x20, 12 /* clobber */, implicit-def dead early-clobber $x21, 12 /* clobber */, implicit-def dead early-clobber $x22, 12 /* clobber */, implicit-def dead early-clobber $x23, 12 /* clobber */, implicit-def dead early-clobber $x24, 12 /* clobber */, implicit-def dead early-clobber $x25, 12 /* clobber */, implicit-def dead early-clobber $x26, 12 /* clobber */, implicit-def dead early-clobber $x27, 12 /* clobber */, implicit-def dead early-clobber $x28, 12 /* clobber */, implicit-def dead early-clobber $x29, 12 /* clobber */, implicit-def dead early-clobber $x31 - ; CHECK-NEXT: renamable $x10_x11 = PseudoRV32ZdinxLD $x2, 8 :: (load (s64) from %stack.0, align 4) - ; CHECK-NEXT: $x8 = frame-destroy LW $x2, 60 :: (load (s32) from %stack.1) - ; CHECK-NEXT: $x9 = frame-destroy LW $x2, 56 :: (load (s32) from %stack.2) - ; CHECK-NEXT: $x18 = frame-destroy LW $x2, 52 :: (load (s32) from %stack.3) - ; CHECK-NEXT: $x19 = frame-destroy LW $x2, 48 :: (load (s32) from %stack.4) - ; CHECK-NEXT: $x20 = frame-destroy LW $x2, 44 :: (load (s32) from %stack.5) - ; CHECK-NEXT: $x21 = frame-destroy LW $x2, 40 :: (load (s32) from %stack.6) - ; CHECK-NEXT: $x22 = frame-destroy LW $x2, 36 :: (load (s32) from %stack.7) - ; CHECK-NEXT: $x23 = frame-destroy LW $x2, 32 :: (load (s32) from %stack.8) - ; CHECK-NEXT: $x24 = frame-destroy LW $x2, 28 :: (load (s32) from %stack.9) - ; CHECK-NEXT: $x25 = frame-destroy LW $x2, 24 :: (load (s32) from %stack.10) - ; CHECK-NEXT: $x26 = frame-destroy LW $x2, 20 :: (load (s32) from %stack.11) - ; CHECK-NEXT: $x27 = frame-destroy LW $x2, 16 :: (load (s32) from %stack.12) - ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $x8 - ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $x9 - ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $x18 - ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $x19 - ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $x20 - ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $x21 - ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $x22 - ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $x23 - ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $x24 - ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $x25 - ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $x26 - ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $x27 - ; CHECK-NEXT: $x2 = frame-destroy ADDI $x2, 64 - ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0 - ; CHECK-NEXT: PseudoRET implicit $x10, implicit $x11 - %a = fadd double %x, %x - call void asm sideeffect "", "~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{x29},~{xr0},~{x31}"() - ret double %a -} diff --git a/llvm/test/CodeGen/RISCV/zilsd.ll b/llvm/test/CodeGen/RISCV/zilsd.ll deleted file mode 100644 index eb5d8237bda8c..0000000000000 --- a/llvm/test/CodeGen/RISCV/zilsd.ll +++ /dev/null @@ -1,121 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=riscv32 -mattr=+zilsd -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefixes=CHECK,SLOW %s -; RUN: llc -mtriple=riscv32 -mattr=+zilsd,+unaligned-scalar-mem -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefixes=CHECK,FAST %s - -define i64 @load(ptr %a) nounwind { -; CHECK-LABEL: load: -; CHECK: # %bb.0: -; CHECK-NEXT: ld a2, 80(a0) -; CHECK-NEXT: ld a0, 0(a0) -; CHECK-NEXT: mv a0, a2 -; CHECK-NEXT: mv a1, a3 -; CHECK-NEXT: ret - %1 = getelementptr i64, ptr %a, i32 10 - %2 = load i64, ptr %1 - %3 = load volatile i64, ptr %a - ret i64 %2 -} - -define void @store(ptr %a, i64 %b) nounwind { -; CHECK-LABEL: store: -; CHECK: # %bb.0: -; CHECK-NEXT: mv a3, a2 -; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: sd a2, 0(a0) -; CHECK-NEXT: sd a2, 88(a0) -; CHECK-NEXT: ret - store i64 %b, ptr %a - %1 = getelementptr i64, ptr %a, i32 11 - store i64 %b, ptr %1 - ret void -} - -define i64 @load_unaligned(ptr %p) { -; SLOW-LABEL: load_unaligned: -; SLOW: # %bb.0: -; SLOW-NEXT: lbu a1, 1(a0) -; SLOW-NEXT: lbu a2, 2(a0) -; SLOW-NEXT: lbu a3, 3(a0) -; SLOW-NEXT: lbu a4, 0(a0) -; SLOW-NEXT: slli a1, a1, 8 -; SLOW-NEXT: slli a2, a2, 16 -; SLOW-NEXT: slli a3, a3, 24 -; SLOW-NEXT: or a1, a1, a4 -; SLOW-NEXT: lbu a4, 4(a0) -; SLOW-NEXT: lbu a5, 5(a0) -; SLOW-NEXT: or a2, a3, a2 -; SLOW-NEXT: lbu a3, 6(a0) -; SLOW-NEXT: lbu a0, 7(a0) -; SLOW-NEXT: slli a5, a5, 8 -; SLOW-NEXT: or a4, a5, a4 -; SLOW-NEXT: slli a3, a3, 16 -; SLOW-NEXT: slli a0, a0, 24 -; SLOW-NEXT: or a3, a0, a3 -; SLOW-NEXT: or a0, a2, a1 -; SLOW-NEXT: or a1, a3, a4 -; SLOW-NEXT: ret -; -; FAST-LABEL: load_unaligned: -; FAST: # %bb.0: -; FAST-NEXT: ld a0, 0(a0) -; FAST-NEXT: ret - %res = load i64, ptr %p, align 1 - ret i64 %res -} - -define void @store_unaligned(ptr %p, i64 %v) { -; SLOW-LABEL: store_unaligned: -; SLOW: # %bb.0: -; SLOW-NEXT: srli a3, a2, 24 -; SLOW-NEXT: srli a4, a2, 16 -; SLOW-NEXT: srli a5, a2, 8 -; SLOW-NEXT: srli a6, a1, 24 -; SLOW-NEXT: srli a7, a1, 16 -; SLOW-NEXT: sb a2, 4(a0) -; SLOW-NEXT: sb a5, 5(a0) -; SLOW-NEXT: sb a4, 6(a0) -; SLOW-NEXT: sb a3, 7(a0) -; SLOW-NEXT: srli a2, a1, 8 -; SLOW-NEXT: sb a1, 0(a0) -; SLOW-NEXT: sb a2, 1(a0) -; SLOW-NEXT: sb a7, 2(a0) -; SLOW-NEXT: sb a6, 3(a0) -; SLOW-NEXT: ret -; -; FAST-LABEL: store_unaligned: -; FAST: # %bb.0: -; FAST-NEXT: mv a3, a2 -; FAST-NEXT: mv a2, a1 -; FAST-NEXT: sd a2, 0(a0) -; FAST-NEXT: ret - store i64 %v, ptr %p, align 1 - ret void -} - -@g = dso_local global i64 0, align 8 - -define i64 @load_g() nounwind { -; CHECK-LABEL: load_g: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lui a0, %hi(g) -; CHECK-NEXT: ld a0, %lo(g)(a0) -; CHECK-NEXT: ret -entry: - %0 = load i64, ptr @g - ret i64 %0 -} - -define void @store_g() nounwind { -; CHECK-LABEL: store_g: -; CHECK: # %bb.0: # %entyr -; CHECK-NEXT: li a0, 0 -; CHECK-NEXT: lui a2, %hi(g) -; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: sd a0, %lo(g)(a2) -; CHECK-NEXT: ret -entyr: - store i64 0, ptr @g - ret void -} diff --git a/llvm/test/CodeGen/SPIRV/pointers/resource-addrspacecast-2.ll b/llvm/test/CodeGen/SPIRV/pointers/resource-addrspacecast-2.ll index d608529b421cc..93208c16ed4a5 100644 --- a/llvm/test/CodeGen/SPIRV/pointers/resource-addrspacecast-2.ll +++ b/llvm/test/CodeGen/SPIRV/pointers/resource-addrspacecast-2.ll @@ -1,6 +1,9 @@ ; RUN: llc -verify-machineinstrs -O3 -mtriple=spirv-unknown-vulkan1.3-compute %s -o - | FileCheck %s --match-full-lines ; RUN: %if spirv-tools %{ llc -O3 -mtriple=spirv-unknown-vulkan1.3-compute %s -o - -filetype=obj | spirv-val %} +; FIXME(134119): enable-this once Offset decoration are added. +; XFAIL: spirv-tools + %S2 = type { { [10 x { i32, i32 } ] }, i32 } ; CHECK-DAG: %[[#uint:]] = OpTypeInt 32 0 diff --git a/llvm/test/CodeGen/SPIRV/pointers/resource-addrspacecast.ll b/llvm/test/CodeGen/SPIRV/pointers/resource-addrspacecast.ll index b1446b7529ea4..24a50c7177340 100644 --- a/llvm/test/CodeGen/SPIRV/pointers/resource-addrspacecast.ll +++ b/llvm/test/CodeGen/SPIRV/pointers/resource-addrspacecast.ll @@ -1,6 +1,9 @@ ; RUN: llc -verify-machineinstrs -O3 -mtriple=spirv-unknown-vulkan1.3-compute %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O3 -mtriple=spirv-unknown-vulkan1.3-compute %s -o - -filetype=obj | spirv-val %} +; FIXME(134119): enable-this once Offset decoration are added. +; XFAIL: spirv-tools + %struct.S = type { i32 } ; CHECK-DAG: %[[#uint:]] = OpTypeInt 32 0 diff --git a/llvm/test/CodeGen/X86/avg-mask.ll b/llvm/test/CodeGen/X86/avg-mask.ll index b148cd3d42df6..ace422e1a925f 100644 --- a/llvm/test/CodeGen/X86/avg-mask.ll +++ b/llvm/test/CodeGen/X86/avg-mask.ll @@ -7,7 +7,7 @@ define <16 x i8> @avg_v16i8_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %src, i16 ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpavgb %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: kmovw %edi, %k1 -; AVX512F-NEXT: vpternlogd {{.*#+}} zmm1 {%k1} {z} = -1 +; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512F-NEXT: vpblendvb %xmm1, %xmm0, %xmm2, %xmm0 ; AVX512F-NEXT: vzeroupper @@ -35,7 +35,7 @@ define <16 x i8> @avg_v16i8_maskz(<16 x i8> %a, <16 x i8> %b, i16 %mask) nounwin ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpavgb %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: kmovw %edi, %k1 -; AVX512F-NEXT: vpternlogd {{.*#+}} zmm1 {%k1} {z} = -1 +; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512F-NEXT: vpand %xmm0, %xmm1, %xmm0 ; AVX512F-NEXT: vzeroupper @@ -64,9 +64,9 @@ define <32 x i8> @avg_v32i8_mask(<32 x i8> %a, <32 x i8> %b, <32 x i8> %src, i32 ; AVX512F-NEXT: shrl $16, %edi ; AVX512F-NEXT: vpavgb %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: kmovw %edi, %k2 -; AVX512F-NEXT: vpternlogd {{.*#+}} zmm1 {%k1} {z} = -1 +; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 -; AVX512F-NEXT: vpternlogd {{.*#+}} zmm3 {%k2} {z} = -1 +; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} ; AVX512F-NEXT: vpmovdb %zmm3, %xmm3 ; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 ; AVX512F-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0 @@ -96,9 +96,9 @@ define <32 x i8> @avg_v32i8_maskz(<32 x i8> %a, <32 x i8> %b, i32 %mask) nounwin ; AVX512F-NEXT: shrl $16, %edi ; AVX512F-NEXT: vpavgb %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: kmovw %edi, %k2 -; AVX512F-NEXT: vpternlogd {{.*#+}} zmm1 {%k1} {z} = -1 +; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 -; AVX512F-NEXT: vpternlogd {{.*#+}} zmm2 {%k2} {z} = -1 +; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} ; AVX512F-NEXT: vpmovdb %zmm2, %xmm2 ; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 ; AVX512F-NEXT: vpand %ymm0, %ymm1, %ymm0 @@ -137,18 +137,18 @@ define <64 x i8> @avg_v64i8_mask(<64 x i8> %a, <64 x i8> %b, <64 x i8> %src, i64 ; AVX512F-NEXT: kmovw %ecx, %k2 ; AVX512F-NEXT: kmovw %eax, %k3 ; AVX512F-NEXT: kmovw %edi, %k4 -; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 {%k4} {z} = -1 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k4} {z} ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512F-NEXT: vpternlogd {{.*#+}} zmm3 {%k3} {z} = -1 +; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k3} {z} ; AVX512F-NEXT: vpmovdb %zmm3, %xmm3 ; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 -; AVX512F-NEXT: vpternlogd {{.*#+}} zmm3 {%k1} {z} = -1 +; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k1} {z} ; AVX512F-NEXT: vpmovdb %zmm3, %xmm3 -; AVX512F-NEXT: vpternlogd {{.*#+}} zmm4 {%k2} {z} = -1 +; AVX512F-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k2} {z} ; AVX512F-NEXT: vpmovdb %zmm4, %xmm4 ; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm0 -; AVX512F-NEXT: vpternlogq {{.*#+}} zmm0 = zmm2 ^ (zmm0 & (zmm1 ^ zmm2)) +; AVX512F-NEXT: vpternlogq $202, %zmm2, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512BWVL-LABEL: avg_v64i8_mask: @@ -185,14 +185,14 @@ define <64 x i8> @avg_v64i8_maskz(<64 x i8> %a, <64 x i8> %b, i64 %mask) nounwin ; AVX512F-NEXT: kmovw %ecx, %k2 ; AVX512F-NEXT: kmovw %eax, %k3 ; AVX512F-NEXT: kmovw %edi, %k4 -; AVX512F-NEXT: vpternlogd {{.*#+}} zmm1 {%k4} {z} = -1 +; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k4} {z} ; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 -; AVX512F-NEXT: vpternlogd {{.*#+}} zmm2 {%k3} {z} = -1 +; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k3} {z} ; AVX512F-NEXT: vpmovdb %zmm2, %xmm2 ; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 -; AVX512F-NEXT: vpternlogd {{.*#+}} zmm2 {%k1} {z} = -1 +; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; AVX512F-NEXT: vpmovdb %zmm2, %xmm2 -; AVX512F-NEXT: vpternlogd {{.*#+}} zmm3 {%k2} {z} = -1 +; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} ; AVX512F-NEXT: vpmovdb %zmm3, %xmm3 ; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1 @@ -220,7 +220,7 @@ define <8 x i16> @avg_v8i16_mask(<8 x i16> %a, <8 x i16> %b, <8 x i16> %src, i8 ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpavgw %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: kmovw %edi, %k1 -; AVX512F-NEXT: vpternlogd {{.*#+}} zmm1 {%k1} {z} = -1 +; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; AVX512F-NEXT: vpmovdw %zmm1, %ymm1 ; AVX512F-NEXT: vpblendvb %xmm1, %xmm0, %xmm2, %xmm0 ; AVX512F-NEXT: vzeroupper @@ -248,7 +248,7 @@ define <8 x i16> @avg_v8i16_maskz(<8 x i16> %a, <8 x i16> %b, i8 %mask) nounwind ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpavgw %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: kmovw %edi, %k1 -; AVX512F-NEXT: vpternlogd {{.*#+}} zmm1 {%k1} {z} = -1 +; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; AVX512F-NEXT: vpmovdw %zmm1, %ymm1 ; AVX512F-NEXT: vpand %xmm0, %xmm1, %xmm0 ; AVX512F-NEXT: vzeroupper @@ -275,7 +275,7 @@ define <16 x i16> @avg_v16i16_mask(<16 x i16> %a, <16 x i16> %b, <16 x i16> %src ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpavgw %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: kmovw %edi, %k1 -; AVX512F-NEXT: vpternlogd {{.*#+}} zmm1 {%k1} {z} = -1 +; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; AVX512F-NEXT: vpmovdw %zmm1, %ymm1 ; AVX512F-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0 ; AVX512F-NEXT: retq @@ -302,7 +302,7 @@ define <16 x i16> @avg_v16i16_maskz(<16 x i16> %a, <16 x i16> %b, i16 %mask) nou ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpavgw %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: kmovw %edi, %k1 -; AVX512F-NEXT: vpternlogd {{.*#+}} zmm1 {%k1} {z} = -1 +; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; AVX512F-NEXT: vpmovdw %zmm1, %ymm1 ; AVX512F-NEXT: vpand %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: retq @@ -334,12 +334,12 @@ define <32 x i16> @avg_v32i16_mask(<32 x i16> %a, <32 x i16> %b, <32 x i16> %src ; AVX512F-NEXT: vpavgw %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm1 ; AVX512F-NEXT: kmovw %edi, %k2 -; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512F-NEXT: vpternlogd {{.*#+}} zmm3 {%k2} {z} = -1 +; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} ; AVX512F-NEXT: vpmovdw %zmm3, %ymm3 ; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0 -; AVX512F-NEXT: vpternlogq {{.*#+}} zmm0 = zmm2 ^ (zmm0 & (zmm1 ^ zmm2)) +; AVX512F-NEXT: vpternlogq $202, %zmm2, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512BWVL-LABEL: avg_v32i16_mask: @@ -370,9 +370,9 @@ define <32 x i16> @avg_v32i16_maskz(<32 x i16> %a, <32 x i16> %b, i32 %mask) nou ; AVX512F-NEXT: vpavgw %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 ; AVX512F-NEXT: kmovw %edi, %k2 -; AVX512F-NEXT: vpternlogd {{.*#+}} zmm1 {%k1} {z} = -1 +; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; AVX512F-NEXT: vpmovdw %zmm1, %ymm1 -; AVX512F-NEXT: vpternlogd {{.*#+}} zmm2 {%k2} {z} = -1 +; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} ; AVX512F-NEXT: vpmovdw %zmm2, %ymm2 ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 ; AVX512F-NEXT: vpandq %zmm0, %zmm1, %zmm0 diff --git a/llvm/test/CodeGen/X86/avgfloors.ll b/llvm/test/CodeGen/X86/avgfloors.ll index 2566860357130..db6f61ed434fd 100644 --- a/llvm/test/CodeGen/X86/avgfloors.ll +++ b/llvm/test/CodeGen/X86/avgfloors.ll @@ -53,7 +53,7 @@ define <16 x i8> @test_fixed_v16i8(<16 x i8> %a0, <16 x i8> %a1) nounwind { ; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpsrlw $1, %xmm0, %xmm0 ; AVX512-NEXT: vpbroadcastb {{.*#+}} xmm1 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64] -; AVX512-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (xmm0 & mem) +; AVX512-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm0 ; AVX512-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX512-NEXT: vpsubb %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq @@ -108,7 +108,7 @@ define <16 x i8> @test_ext_v16i8(<16 x i8> %a0, <16 x i8> %a1) nounwind { ; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpsrlw $1, %xmm0, %xmm0 ; AVX512-NEXT: vpbroadcastb {{.*#+}} xmm1 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64] -; AVX512-NEXT: vpternlogd {{.*#+}} xmm0 = xmm1 ^ (xmm0 & mem) +; AVX512-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm0 ; AVX512-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX512-NEXT: vpsubb %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq @@ -405,7 +405,7 @@ define <32 x i8> @test_fixed_v32i8(<32 x i8> %a0, <32 x i8> %a1) nounwind { ; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vpsrlw $1, %ymm0, %ymm0 ; AVX512-NEXT: vpbroadcastb {{.*#+}} ymm1 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64] -; AVX512-NEXT: vpternlogd {{.*#+}} ymm0 = ymm1 ^ (ymm0 & mem) +; AVX512-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm0 ; AVX512-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX512-NEXT: vpsubb %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: retq @@ -478,7 +478,7 @@ define <32 x i8> @test_ext_v32i8(<32 x i8> %a0, <32 x i8> %a1) nounwind { ; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vpsrlw $1, %ymm0, %ymm0 ; AVX512-NEXT: vpbroadcastb {{.*#+}} ymm1 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64] -; AVX512-NEXT: vpternlogd {{.*#+}} ymm0 = ymm1 ^ (ymm0 & mem) +; AVX512-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm0 ; AVX512-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX512-NEXT: vpsubb %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: retq @@ -966,7 +966,7 @@ define <64 x i8> @test_fixed_v64i8(<64 x i8> %a0, <64 x i8> %a1) nounwind { ; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vpsrlw $1, %zmm0, %zmm0 ; AVX512-NEXT: vpbroadcastb {{.*#+}} zmm1 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64] -; AVX512-NEXT: vpternlogd {{.*#+}} zmm0 = zmm1 ^ (zmm0 & mem) +; AVX512-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0 ; AVX512-NEXT: vpaddb %zmm2, %zmm0, %zmm0 ; AVX512-NEXT: vpsubb %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: retq @@ -1078,7 +1078,7 @@ define <64 x i8> @test_ext_v64i8(<64 x i8> %a0, <64 x i8> %a1) nounwind { ; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vpsrlw $1, %zmm0, %zmm0 ; AVX512-NEXT: vpbroadcastb {{.*#+}} zmm1 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64] -; AVX512-NEXT: vpternlogd {{.*#+}} zmm0 = zmm1 ^ (zmm0 & mem) +; AVX512-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0 ; AVX512-NEXT: vpaddb %zmm2, %zmm0, %zmm0 ; AVX512-NEXT: vpsubb %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: retq diff --git a/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll b/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll index ae422381c841c..e3c5a5023ac9e 100644 --- a/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll +++ b/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll @@ -23,7 +23,7 @@ define <4 x i32> @reassociate_and_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> ; AVX512-LABEL: reassociate_and_v4i32: ; AVX512: # %bb.0: ; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 & xmm3 & xmm2 +; AVX512-NEXT: vpternlogd $128, %xmm2, %xmm3, %xmm0 ; AVX512-NEXT: retq %t0 = add <4 x i32> %x0, %x1 @@ -50,7 +50,7 @@ define <4 x i32> @reassociate_or_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> % ; AVX512-LABEL: reassociate_or_v4i32: ; AVX512: # %bb.0: ; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 | xmm3 | xmm2 +; AVX512-NEXT: vpternlogd $254, %xmm2, %xmm3, %xmm0 ; AVX512-NEXT: retq %t0 = add <4 x i32> %x0, %x1 @@ -77,7 +77,7 @@ define <4 x i32> @reassociate_xor_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> ; AVX512-LABEL: reassociate_xor_v4i32: ; AVX512: # %bb.0: ; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpternlogd {{.*#+}} xmm0 = xmm0 ^ xmm3 ^ xmm2 +; AVX512-NEXT: vpternlogd $150, %xmm2, %xmm3, %xmm0 ; AVX512-NEXT: retq %t0 = add <4 x i32> %x0, %x1 @@ -109,7 +109,7 @@ define <8 x i32> @reassociate_and_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> ; AVX512-LABEL: reassociate_and_v8i32: ; AVX512: # %bb.0: ; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 & ymm3 & ymm2 +; AVX512-NEXT: vpternlogd $128, %ymm2, %ymm3, %ymm0 ; AVX512-NEXT: retq %t0 = add <8 x i32> %x0, %x1 @@ -139,7 +139,7 @@ define <8 x i32> @reassociate_or_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> % ; AVX512-LABEL: reassociate_or_v8i32: ; AVX512: # %bb.0: ; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 | ymm3 | ymm2 +; AVX512-NEXT: vpternlogd $254, %ymm2, %ymm3, %ymm0 ; AVX512-NEXT: retq %t0 = add <8 x i32> %x0, %x1 @@ -169,7 +169,7 @@ define <8 x i32> @reassociate_xor_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> ; AVX512-LABEL: reassociate_xor_v8i32: ; AVX512: # %bb.0: ; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpternlogd {{.*#+}} ymm0 = ymm0 ^ ymm3 ^ ymm2 +; AVX512-NEXT: vpternlogd $150, %ymm2, %ymm3, %ymm0 ; AVX512-NEXT: retq %t0 = add <8 x i32> %x0, %x1 @@ -211,7 +211,7 @@ define <16 x i32> @reassociate_and_v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x ; AVX512-LABEL: reassociate_and_v16i32: ; AVX512: # %bb.0: ; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 & zmm3 & zmm2 +; AVX512-NEXT: vpternlogd $128, %zmm2, %zmm3, %zmm0 ; AVX512-NEXT: retq %t0 = add <16 x i32> %x0, %x1 @@ -250,7 +250,7 @@ define <16 x i32> @reassociate_or_v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x i ; AVX512-LABEL: reassociate_or_v16i32: ; AVX512: # %bb.0: ; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 | zmm3 | zmm2 +; AVX512-NEXT: vpternlogd $254, %zmm2, %zmm3, %zmm0 ; AVX512-NEXT: retq %t0 = add <16 x i32> %x0, %x1 @@ -289,7 +289,7 @@ define <16 x i32> @reassociate_xor_v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x ; AVX512-LABEL: reassociate_xor_v16i32: ; AVX512: # %bb.0: ; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vpternlogd {{.*#+}} zmm0 = zmm0 ^ zmm3 ^ zmm2 +; AVX512-NEXT: vpternlogd $150, %zmm2, %zmm3, %zmm0 ; AVX512-NEXT: retq %t0 = add <16 x i32> %x0, %x1 diff --git a/llvm/test/CodeGen/X86/nofpclass.ll b/llvm/test/CodeGen/X86/nofpclass.ll deleted file mode 100644 index 55f0af904a38d..0000000000000 --- a/llvm/test/CodeGen/X86/nofpclass.ll +++ /dev/null @@ -1,25 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-sse2,-sse | FileCheck %s --check-prefix=NOSSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=SSE - -@gf = global { float, float } zeroinitializer, align 8 - -define void @f(<2 x float> noundef nofpclass(nan inf) %e.coerce) { -; NOSSE-LABEL: f: -; NOSSE: # %bb.0: # %entry -; NOSSE-NEXT: flds {{[0-9]+}}(%rsp) -; NOSSE-NEXT: flds {{[0-9]+}}(%rsp) -; NOSSE-NEXT: movq gf@GOTPCREL(%rip), %rax -; NOSSE-NEXT: fstps 4(%rax) -; NOSSE-NEXT: fstps (%rax) -; NOSSE-NEXT: retq -; -; SSE-LABEL: f: -; SSE: # %bb.0: # %entry -; SSE-NEXT: movq gf@GOTPCREL(%rip), %rax -; SSE-NEXT: movlps %xmm0, (%rax) -; SSE-NEXT: retq -entry: - store <2 x float> %e.coerce, ptr @gf, align 8 - ret void -} diff --git a/llvm/test/CodeGen/X86/pr63108.ll b/llvm/test/CodeGen/X86/pr63108.ll index 4bbc1707e10c3..b552e3238470f 100644 --- a/llvm/test/CodeGen/X86/pr63108.ll +++ b/llvm/test/CodeGen/X86/pr63108.ll @@ -15,7 +15,7 @@ define i32 @PR63108() { ; SSE-NEXT: .LBB0_2: # %vector.body.preheader ; SSE-NEXT: pxor %xmm0, %xmm0 ; SSE-NEXT: movd {{.*#+}} xmm1 = [57339,0,0,0] -; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: .p2align 4 ; SSE-NEXT: .LBB0_3: # %vector.body ; SSE-NEXT: # =>This Inner Loop Header: Depth=1 diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/alloca-array.ll b/llvm/test/Instrumentation/HWAddressSanitizer/alloca-array.ll index 9064d5ca8df4e..51d34ce5b3882 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/alloca-array.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/alloca-array.ll @@ -9,7 +9,7 @@ declare void @use(ptr, ptr) define void @test_alloca() sanitize_hwaddress { ; CHECK-LABEL: define void @test_alloca ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer.p0() +; CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer() ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 48 ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[TMP4:%.*]] = ashr i64 [[TMP3]], 3 diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/alloca-compat.ll b/llvm/test/Instrumentation/HWAddressSanitizer/alloca-compat.ll index aae2946cbb190..9e9ed50d35daf 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/alloca-compat.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/alloca-compat.ll @@ -11,7 +11,7 @@ declare void @use32(ptr) define void @test_alloca() sanitize_hwaddress { ; CHECK-LABEL: define void @test_alloca ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer.p0() +; CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer() ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 48 ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[TMP4:%.*]] = ashr i64 [[TMP3]], 3 diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/alloca-with-calls.ll b/llvm/test/Instrumentation/HWAddressSanitizer/alloca-with-calls.ll index 9ef624c0b7f75..0ef09321e41ad 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/alloca-with-calls.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/alloca-with-calls.ll @@ -12,7 +12,7 @@ define void @test_alloca() sanitize_hwaddress { ; CHECK-LABEL: define void @test_alloca ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call ptr @llvm.thread.pointer.p0() +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @llvm.thread.pointer() ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 48 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = ashr i64 [[TMP2]], 3 diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/exception-lifetime.ll b/llvm/test/Instrumentation/HWAddressSanitizer/exception-lifetime.ll index 50ce490f297bc..9e9fceb5eb472 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/exception-lifetime.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/exception-lifetime.ll @@ -18,7 +18,7 @@ define void @test() sanitize_hwaddress personality ptr @__gxx_personality_v0 { ; CHECK-LABEL: define void @test ; CHECK-SAME: () #[[ATTR0:[0-9]+]] personality ptr @__gxx_personality_v0 { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call ptr @llvm.thread.pointer.p0() +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @llvm.thread.pointer() ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 48 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = ashr i64 [[TMP2]], 3 diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/prologue.ll b/llvm/test/Instrumentation/HWAddressSanitizer/prologue.ll index 4e7c021bd7f97..1698592bafc62 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/prologue.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/prologue.ll @@ -88,7 +88,7 @@ define void @test_alloca() sanitize_hwaddress { ; CHECK-LABEL: define void @test_alloca ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call ptr @llvm.thread.pointer.p0() +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @llvm.thread.pointer() ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 48 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = ashr i64 [[TMP2]], 3 @@ -134,7 +134,7 @@ define void @test_alloca() sanitize_hwaddress { ; NOIFUNC-TLS-HISTORY-LABEL: define void @test_alloca ; NOIFUNC-TLS-HISTORY-SAME: () #[[ATTR0]] { ; NOIFUNC-TLS-HISTORY-NEXT: entry: -; NOIFUNC-TLS-HISTORY-NEXT: [[TMP0:%.*]] = call ptr @llvm.thread.pointer.p0() +; NOIFUNC-TLS-HISTORY-NEXT: [[TMP0:%.*]] = call ptr @llvm.thread.pointer() ; NOIFUNC-TLS-HISTORY-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 48 ; NOIFUNC-TLS-HISTORY-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 ; NOIFUNC-TLS-HISTORY-NEXT: [[TMP3:%.*]] = ashr i64 [[TMP2]], 3 diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/use-after-scope-setjmp.ll b/llvm/test/Instrumentation/HWAddressSanitizer/use-after-scope-setjmp.ll index 57d37ca1ef95a..62fd7a1671569 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/use-after-scope-setjmp.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/use-after-scope-setjmp.ll @@ -12,7 +12,7 @@ define dso_local noundef i1 @_Z6targetv() sanitize_hwaddress { ; CHECK-LABEL: define dso_local noundef i1 @_Z6targetv ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call ptr @llvm.thread.pointer.p0() +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @llvm.thread.pointer() ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 48 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = ashr i64 [[TMP2]], 3 diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/use-after-scope.ll b/llvm/test/Instrumentation/HWAddressSanitizer/use-after-scope.ll index e30b51890e172..16e6cda59a616 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/use-after-scope.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/use-after-scope.ll @@ -65,7 +65,7 @@ define dso_local i32 @standard_lifetime() local_unnamed_addr sanitize_hwaddress ; X86-NOSCOPE-NEXT: ret i32 0 ; ; AARCH64-SCOPE-LABEL: @standard_lifetime( -; AARCH64-SCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer.p0() +; AARCH64-SCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer() ; AARCH64-SCOPE-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 48 ; AARCH64-SCOPE-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 ; AARCH64-SCOPE-NEXT: [[TMP4:%.*]] = ashr i64 [[TMP3]], 3 @@ -117,7 +117,7 @@ define dso_local i32 @standard_lifetime() local_unnamed_addr sanitize_hwaddress ; AARCH64-SCOPE-NEXT: ret i32 0 ; ; AARCH64-NOSCOPE-LABEL: @standard_lifetime( -; AARCH64-NOSCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer.p0() +; AARCH64-NOSCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer() ; AARCH64-NOSCOPE-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 48 ; AARCH64-NOSCOPE-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 ; AARCH64-NOSCOPE-NEXT: [[TMP4:%.*]] = ashr i64 [[TMP3]], 3 @@ -167,7 +167,7 @@ define dso_local i32 @standard_lifetime() local_unnamed_addr sanitize_hwaddress ; AARCH64-NOSCOPE-NEXT: ret i32 0 ; ; AARCH64-SHORT-SCOPE-LABEL: @standard_lifetime( -; AARCH64-SHORT-SCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer.p0() +; AARCH64-SHORT-SCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer() ; AARCH64-SHORT-SCOPE-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 48 ; AARCH64-SHORT-SCOPE-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 ; AARCH64-SHORT-SCOPE-NEXT: [[TMP4:%.*]] = ashr i64 [[TMP3]], 3 @@ -222,7 +222,7 @@ define dso_local i32 @standard_lifetime() local_unnamed_addr sanitize_hwaddress ; AARCH64-SHORT-SCOPE-NEXT: ret i32 0 ; ; AARCH64-SHORT-NOSCOPE-LABEL: @standard_lifetime( -; AARCH64-SHORT-NOSCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer.p0() +; AARCH64-SHORT-NOSCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer() ; AARCH64-SHORT-NOSCOPE-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 48 ; AARCH64-SHORT-NOSCOPE-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 ; AARCH64-SHORT-NOSCOPE-NEXT: [[TMP4:%.*]] = ashr i64 [[TMP3]], 3 @@ -346,7 +346,7 @@ define dso_local i32 @standard_lifetime_optnone() local_unnamed_addr optnone noi ; X86-NOSCOPE-NEXT: ret i32 0 ; ; AARCH64-SCOPE-LABEL: @standard_lifetime_optnone( -; AARCH64-SCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer.p0() +; AARCH64-SCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer() ; AARCH64-SCOPE-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 48 ; AARCH64-SCOPE-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 ; AARCH64-SCOPE-NEXT: [[TMP4:%.*]] = ashr i64 [[TMP3]], 3 @@ -398,7 +398,7 @@ define dso_local i32 @standard_lifetime_optnone() local_unnamed_addr optnone noi ; AARCH64-SCOPE-NEXT: ret i32 0 ; ; AARCH64-NOSCOPE-LABEL: @standard_lifetime_optnone( -; AARCH64-NOSCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer.p0() +; AARCH64-NOSCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer() ; AARCH64-NOSCOPE-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 48 ; AARCH64-NOSCOPE-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 ; AARCH64-NOSCOPE-NEXT: [[TMP4:%.*]] = ashr i64 [[TMP3]], 3 @@ -448,7 +448,7 @@ define dso_local i32 @standard_lifetime_optnone() local_unnamed_addr optnone noi ; AARCH64-NOSCOPE-NEXT: ret i32 0 ; ; AARCH64-SHORT-SCOPE-LABEL: @standard_lifetime_optnone( -; AARCH64-SHORT-SCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer.p0() +; AARCH64-SHORT-SCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer() ; AARCH64-SHORT-SCOPE-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 48 ; AARCH64-SHORT-SCOPE-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 ; AARCH64-SHORT-SCOPE-NEXT: [[TMP4:%.*]] = ashr i64 [[TMP3]], 3 @@ -503,7 +503,7 @@ define dso_local i32 @standard_lifetime_optnone() local_unnamed_addr optnone noi ; AARCH64-SHORT-SCOPE-NEXT: ret i32 0 ; ; AARCH64-SHORT-NOSCOPE-LABEL: @standard_lifetime_optnone( -; AARCH64-SHORT-NOSCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer.p0() +; AARCH64-SHORT-NOSCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer() ; AARCH64-SHORT-NOSCOPE-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 48 ; AARCH64-SHORT-NOSCOPE-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 ; AARCH64-SHORT-NOSCOPE-NEXT: [[TMP4:%.*]] = ashr i64 [[TMP3]], 3 @@ -617,7 +617,7 @@ define dso_local i32 @multiple_lifetimes() local_unnamed_addr sanitize_hwaddress ; X86-NOSCOPE-NEXT: ret i32 0 ; ; AARCH64-SCOPE-LABEL: @multiple_lifetimes( -; AARCH64-SCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer.p0() +; AARCH64-SCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer() ; AARCH64-SCOPE-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 48 ; AARCH64-SCOPE-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 ; AARCH64-SCOPE-NEXT: [[TMP4:%.*]] = ashr i64 [[TMP3]], 3 @@ -663,7 +663,7 @@ define dso_local i32 @multiple_lifetimes() local_unnamed_addr sanitize_hwaddress ; AARCH64-SCOPE-NEXT: ret i32 0 ; ; AARCH64-NOSCOPE-LABEL: @multiple_lifetimes( -; AARCH64-NOSCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer.p0() +; AARCH64-NOSCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer() ; AARCH64-NOSCOPE-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 48 ; AARCH64-NOSCOPE-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 ; AARCH64-NOSCOPE-NEXT: [[TMP4:%.*]] = ashr i64 [[TMP3]], 3 @@ -709,7 +709,7 @@ define dso_local i32 @multiple_lifetimes() local_unnamed_addr sanitize_hwaddress ; AARCH64-NOSCOPE-NEXT: ret i32 0 ; ; AARCH64-SHORT-SCOPE-LABEL: @multiple_lifetimes( -; AARCH64-SHORT-SCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer.p0() +; AARCH64-SHORT-SCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer() ; AARCH64-SHORT-SCOPE-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 48 ; AARCH64-SHORT-SCOPE-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 ; AARCH64-SHORT-SCOPE-NEXT: [[TMP4:%.*]] = ashr i64 [[TMP3]], 3 @@ -758,7 +758,7 @@ define dso_local i32 @multiple_lifetimes() local_unnamed_addr sanitize_hwaddress ; AARCH64-SHORT-SCOPE-NEXT: ret i32 0 ; ; AARCH64-SHORT-NOSCOPE-LABEL: @multiple_lifetimes( -; AARCH64-SHORT-NOSCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer.p0() +; AARCH64-SHORT-NOSCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer() ; AARCH64-SHORT-NOSCOPE-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 48 ; AARCH64-SHORT-NOSCOPE-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 ; AARCH64-SHORT-NOSCOPE-NEXT: [[TMP4:%.*]] = ashr i64 [[TMP3]], 3 @@ -877,7 +877,7 @@ define dso_local i32 @unreachable_exit() local_unnamed_addr sanitize_hwaddress { ; X86-NOSCOPE-NEXT: ret i32 0 ; ; AARCH64-SCOPE-LABEL: @unreachable_exit( -; AARCH64-SCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer.p0() +; AARCH64-SCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer() ; AARCH64-SCOPE-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 48 ; AARCH64-SCOPE-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 ; AARCH64-SCOPE-NEXT: [[TMP4:%.*]] = ashr i64 [[TMP3]], 3 @@ -934,7 +934,7 @@ define dso_local i32 @unreachable_exit() local_unnamed_addr sanitize_hwaddress { ; AARCH64-SCOPE-NEXT: ret i32 0 ; ; AARCH64-NOSCOPE-LABEL: @unreachable_exit( -; AARCH64-NOSCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer.p0() +; AARCH64-NOSCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer() ; AARCH64-NOSCOPE-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 48 ; AARCH64-NOSCOPE-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 ; AARCH64-NOSCOPE-NEXT: [[TMP4:%.*]] = ashr i64 [[TMP3]], 3 @@ -990,7 +990,7 @@ define dso_local i32 @unreachable_exit() local_unnamed_addr sanitize_hwaddress { ; AARCH64-NOSCOPE-NEXT: ret i32 0 ; ; AARCH64-SHORT-SCOPE-LABEL: @unreachable_exit( -; AARCH64-SHORT-SCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer.p0() +; AARCH64-SHORT-SCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer() ; AARCH64-SHORT-SCOPE-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 48 ; AARCH64-SHORT-SCOPE-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 ; AARCH64-SHORT-SCOPE-NEXT: [[TMP4:%.*]] = ashr i64 [[TMP3]], 3 @@ -1050,7 +1050,7 @@ define dso_local i32 @unreachable_exit() local_unnamed_addr sanitize_hwaddress { ; AARCH64-SHORT-SCOPE-NEXT: ret i32 0 ; ; AARCH64-SHORT-NOSCOPE-LABEL: @unreachable_exit( -; AARCH64-SHORT-NOSCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer.p0() +; AARCH64-SHORT-NOSCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer() ; AARCH64-SHORT-NOSCOPE-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 48 ; AARCH64-SHORT-NOSCOPE-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 ; AARCH64-SHORT-NOSCOPE-NEXT: [[TMP4:%.*]] = ashr i64 [[TMP3]], 3 @@ -1185,7 +1185,7 @@ define dso_local i32 @diamond_lifetime() local_unnamed_addr sanitize_hwaddress { ; X86-NOSCOPE-NEXT: ret i32 0 ; ; AARCH64-SCOPE-LABEL: @diamond_lifetime( -; AARCH64-SCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer.p0() +; AARCH64-SCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer() ; AARCH64-SCOPE-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 48 ; AARCH64-SCOPE-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 ; AARCH64-SCOPE-NEXT: [[TMP4:%.*]] = ashr i64 [[TMP3]], 3 @@ -1246,7 +1246,7 @@ define dso_local i32 @diamond_lifetime() local_unnamed_addr sanitize_hwaddress { ; AARCH64-SCOPE-NEXT: ret i32 0 ; ; AARCH64-NOSCOPE-LABEL: @diamond_lifetime( -; AARCH64-NOSCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer.p0() +; AARCH64-NOSCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer() ; AARCH64-NOSCOPE-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 48 ; AARCH64-NOSCOPE-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 ; AARCH64-NOSCOPE-NEXT: [[TMP4:%.*]] = ashr i64 [[TMP3]], 3 @@ -1298,7 +1298,7 @@ define dso_local i32 @diamond_lifetime() local_unnamed_addr sanitize_hwaddress { ; AARCH64-NOSCOPE-NEXT: ret i32 0 ; ; AARCH64-SHORT-SCOPE-LABEL: @diamond_lifetime( -; AARCH64-SHORT-SCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer.p0() +; AARCH64-SHORT-SCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer() ; AARCH64-SHORT-SCOPE-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 48 ; AARCH64-SHORT-SCOPE-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 ; AARCH64-SHORT-SCOPE-NEXT: [[TMP4:%.*]] = ashr i64 [[TMP3]], 3 @@ -1362,7 +1362,7 @@ define dso_local i32 @diamond_lifetime() local_unnamed_addr sanitize_hwaddress { ; AARCH64-SHORT-SCOPE-NEXT: ret i32 0 ; ; AARCH64-SHORT-NOSCOPE-LABEL: @diamond_lifetime( -; AARCH64-SHORT-NOSCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer.p0() +; AARCH64-SHORT-NOSCOPE-NEXT: [[TMP1:%.*]] = call ptr @llvm.thread.pointer() ; AARCH64-SHORT-NOSCOPE-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 48 ; AARCH64-SHORT-NOSCOPE-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 ; AARCH64-SHORT-NOSCOPE-NEXT: [[TMP4:%.*]] = ashr i64 [[TMP3]], 3 diff --git a/llvm/test/MC/RISCV/rv32q-invalid.s b/llvm/test/MC/RISCV/rv32q-invalid.s deleted file mode 100644 index 9b74f9f1d59c1..0000000000000 --- a/llvm/test/MC/RISCV/rv32q-invalid.s +++ /dev/null @@ -1,21 +0,0 @@ -# RUN: not llvm-mc -triple riscv32 -mattr=+q < %s 2>&1 | FileCheck %s - -# Out of range immediates -## simm12 -flq ft1, -2049(a0) # CHECK: :[[@LINE]]:10: error: operand must be a symbol with %lo/%pcrel_lo/%tprel_lo specifier or an integer in the range [-2048, 2047] -fsq ft2, 2048(a1) # CHECK: :[[@LINE]]:10: error: operand must be a symbol with %lo/%pcrel_lo/%tprel_lo specifier or an integer in the range [-2048, 2047] - -# Memory operand not formatted correctly -flq ft1, a0, -200 # CHECK: :[[@LINE]]:14: error: invalid operand for instruction -fsq ft2, a1, 100 # CHECK: :[[@LINE]]:14: error: invalid operand for instruction - -# Invalid register names -flq ft15, 100(a0) # CHECK: :[[@LINE]]:5: error: invalid operand for instruction -flq ft1, 100(a10) # CHECK: :[[@LINE]]:14: error: expected register -fsgnjn.q fa100, fa2, fa3 # CHECK: :[[@LINE]]:10: error: invalid operand for instruction - -# Integer registers where FP regs are expected -fadd.q a2, a1, a0 # CHECK: :[[@LINE]]:8: error: invalid operand for instruction - -# FP registers where integer regs are expected -fcvt.wu.q ft2, a1 # CHECK: :[[@LINE]]:11: error: invalid operand for instruction diff --git a/llvm/test/MC/RISCV/rv64q-invalid.s b/llvm/test/MC/RISCV/rv64q-invalid.s deleted file mode 100644 index ac469c268d7ad..0000000000000 --- a/llvm/test/MC/RISCV/rv64q-invalid.s +++ /dev/null @@ -1,9 +0,0 @@ -# RUN: not llvm-mc -triple riscv64 -mattr=+q < %s 2>&1 | FileCheck %s - -# Integer registers where FP regs are expected -fcvt.l.q ft0, a0 # CHECK: :[[@LINE]]:10: error: invalid operand for instruction -fcvt.lu.q ft1, a1 # CHECK: :[[@LINE]]:11: error: invalid operand for instruction - -# FP registers where integer regs are expected -fcvt.q.l a3, ft3 # CHECK: :[[@LINE]]:10: error: invalid operand for instruction -fcvt.q.lu a4, ft4 # CHECK: :[[@LINE]]:11: error: invalid operand for instruction diff --git a/llvm/test/MC/RISCV/rv64q-valid.s b/llvm/test/MC/RISCV/rv64q-valid.s deleted file mode 100644 index 81bb2852eac0f..0000000000000 --- a/llvm/test/MC/RISCV/rv64q-valid.s +++ /dev/null @@ -1,43 +0,0 @@ -# RUN: llvm-mc %s -triple=riscv64 -mattr=+q -M no-aliases -show-encoding \ -# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+q < %s \ -# RUN: | llvm-objdump --mattr=+q -M no-aliases -d -r - \ -# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s -# -# RUN: not llvm-mc -triple riscv32 -mattr=+q < %s 2>&1 \ -# RUN: | FileCheck -check-prefix=CHECK-RV32 %s - -# CHECK-ASM-AND-OBJ: fcvt.l.q a0, ft0, dyn -# CHECK-ASM: encoding: [0x53,0x75,0x20,0xc6] -# CHECK-RV32: :[[@LINE+1]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}} -fcvt.l.q a0, ft0, dyn -# CHECK-ASM-AND-OBJ: fcvt.lu.q a1, ft1, dyn -# CHECK-ASM: encoding: [0xd3,0xf5,0x30,0xc6] -# CHECK-RV32: :[[@LINE+1]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}} -fcvt.lu.q a1, ft1, dyn -# CHECK-ASM-AND-OBJ: fcvt.q.l ft3, a3, dyn -# CHECK-ASM: encoding: [0xd3,0xf1,0x26,0xd6] -# CHECK-RV32: :[[@LINE+1]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}} -fcvt.q.l ft3, a3, dyn -# CHECK-ASM-AND-OBJ: fcvt.q.lu ft4, a4, dyn -# CHECK-ASM: encoding: [0x53,0x72,0x37,0xd6] -# CHECK-RV32: :[[@LINE+1]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}} -fcvt.q.lu ft4, a4, dyn - -# Rounding modes -# CHECK-ASM-AND-OBJ: fcvt.q.l ft3, a3 -# CHECK-ASM: encoding: [0xd3,0x81,0x26,0xd6] -# CHECK-RV32: :[[@LINE+1]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}} -fcvt.q.l ft3, a3, rne -# CHECK-ASM-AND-OBJ: fcvt.q.lu ft4, a4, rtz -# CHECK-ASM: encoding: [0x53,0x12,0x37,0xd6] -# CHECK-RV32: :[[@LINE+1]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}} -fcvt.q.lu ft4, a4, rtz -# CHECK-ASM-AND-OBJ: fcvt.l.q a0, ft0, rdn -# CHECK-ASM: encoding: [0x53,0x25,0x20,0xc6] -# CHECK-RV32: :[[@LINE+1]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}} -fcvt.l.q a0, ft0, rdn -# CHECK-ASM-AND-OBJ: fcvt.lu.q a1, ft1, rup -# CHECK-ASM: encoding: [0xd3,0xb5,0x30,0xc6] -# CHECK-RV32: :[[@LINE+1]]:1: error: instruction requires the following: RV64I Base Instruction Set{{$}} -fcvt.lu.q a1, ft1, rup diff --git a/llvm/test/MC/RISCV/rv64zfa-only-valid.s b/llvm/test/MC/RISCV/rv64zfa-only-valid.s deleted file mode 100644 index 95fb253b145c1..0000000000000 --- a/llvm/test/MC/RISCV/rv64zfa-only-valid.s +++ /dev/null @@ -1,19 +0,0 @@ -# RUN: llvm-mc %s -triple=riscv64 -mattr=+zfa,+q,+zfh -M no-aliases -show-encoding \ -# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zfa,+q,+zfh < %s \ -# RUN: | llvm-objdump --mattr=+zfa,+q,+zfh -M no-aliases -d -r - \ -# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s -# -# RUN: not llvm-mc -triple riscv64 -mattr=+q,+zfh \ -# RUN: -M no-aliases -show-encoding < %s 2>&1 \ -# RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s - -# CHECK-ASM-AND-OBJ: fmvh.x.q a1, fs1 -# CHECK-ASM: encoding: [0xd3,0x85,0x14,0xe6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fmvh.x.q a1, fs1 - -# CHECK-ASM-AND-OBJ: fmvp.q.x fs1, a1, a2 -# CHECK-ASM: encoding: [0xd3,0x84,0xc5,0xb6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fmvp.q.x fs1, a1, a2 diff --git a/llvm/test/MC/RISCV/rvq-aliases-valid.s b/llvm/test/MC/RISCV/rvq-aliases-valid.s deleted file mode 100644 index 85e24f0e970cb..0000000000000 --- a/llvm/test/MC/RISCV/rvq-aliases-valid.s +++ /dev/null @@ -1,55 +0,0 @@ -# RUN: llvm-mc %s -triple=riscv32 -mattr=+q -M no-aliases \ -# RUN: | FileCheck -check-prefix=CHECK-INST %s -# RUN: llvm-mc %s -triple=riscv32 -mattr=+q \ -# RUN: | FileCheck -check-prefix=CHECK-ALIAS %s -# RUN: llvm-mc %s -triple=riscv64 -mattr=+q -M no-aliases \ -# RUN: | FileCheck -check-prefix=CHECK-INST %s -# RUN: llvm-mc %s -triple=riscv64 -mattr=+q \ -# RUN: | FileCheck -check-prefix=CHECK-ALIAS %s -# RUN: llvm-mc -filetype=obj -triple riscv32 -mattr=+q < %s \ -# RUN: | llvm-objdump -d --mattr=+q --no-print-imm-hex -M no-aliases - \ -# RUN: | FileCheck -check-prefix=CHECK-INST %s -# RUN: llvm-mc -filetype=obj -triple riscv32 -mattr=+q < %s \ -# RUN: | llvm-objdump -d --mattr=+q --no-print-imm-hex - \ -# RUN: | FileCheck -check-prefix=CHECK-ALIAS %s -# RUN: llvm-mc -filetype=obj -triple riscv64 -mattr=+q < %s \ -# RUN: | llvm-objdump -d --mattr=+q --no-print-imm-hex -M no-aliases - \ -# RUN: | FileCheck -check-prefix=CHECK-INST %s -# RUN: llvm-mc -filetype=obj -triple riscv64 -mattr=+q < %s \ -# RUN: | llvm-objdump -d --mattr=+q --no-print-imm-hex - \ -# RUN: | FileCheck -check-prefix=CHECK-ALIAS %s - -##===----------------------------------------------------------------------===## -## Assembler Pseudo Instructions (User-Level ISA, Version 2.2, Chapter 20) -##===----------------------------------------------------------------------===## - -# CHECK-INST: flq ft0, 0(a0) -# CHECK-ALIAS: flq ft0, 0(a0) -flq f0, (a0) -# CHECK-INST: fsq ft0, 0(a0) -# CHECK-ALIAS: fsq ft0, 0(a0) -fsq f0, (a0) - -# CHECK-INST: fsgnj.q ft0, ft1, ft1 -# CHECK-ALIAS: fmv.q ft0, ft1 -fmv.q f0, f1 -# CHECK-INST: fsgnjx.q ft1, ft2, ft2 -# CHECK-ALIAS: fabs.q ft1, ft2 -fabs.q f1, f2 -# CHECK-INST: fsgnjn.q ft2, ft3, ft3 -# CHECK-ALIAS: fneg.q ft2, ft3 -fneg.q f2, f3 - -# CHECK-INST: flt.q tp, ft6, ft5 -# CHECK-ALIAS: flt.q tp, ft6, ft5 -fgt.q x4, f5, f6 -# CHECK-INST: fle.q t2, fs1, fs0 -# CHECK-ALIAS: fle.q t2, fs1, fs0 -fge.q x7, f8, f9 - -# CHECK-INST: flq ft0, 0(a0) -# CHECK-ALIAS: flq ft0, 0(a0) -flq f0, (x10) -# CHECK-INST: fsq ft0, 0(a0) -# CHECK-ALIAS: fsq ft0, 0(a0) -fsq f0, (x10) diff --git a/llvm/test/MC/RISCV/rvq-pseudos.s b/llvm/test/MC/RISCV/rvq-pseudos.s deleted file mode 100644 index 9c49a1bac3f15..0000000000000 --- a/llvm/test/MC/RISCV/rvq-pseudos.s +++ /dev/null @@ -1,12 +0,0 @@ -# RUN: llvm-mc %s -triple=riscv32 -mattr=+q | FileCheck %s -# RUN: llvm-mc %s -triple=riscv64 -mattr=+q | FileCheck %s - -# CHECK: .Lpcrel_hi0: -# CHECK: auipc a2, %pcrel_hi(a_symbol) -# CHECK: flq fa2, %pcrel_lo(.Lpcrel_hi0)(a2) -flq fa2, a_symbol, a2 - -# CHECK: .Lpcrel_hi1: -# CHECK: auipc a3, %pcrel_hi(a_symbol) -# CHECK: fsq fa2, %pcrel_lo(.Lpcrel_hi1)(a3) -fsq fa2, a_symbol, a3 diff --git a/llvm/test/MC/RISCV/rvq-valid.s b/llvm/test/MC/RISCV/rvq-valid.s deleted file mode 100644 index fe224f85cd699..0000000000000 --- a/llvm/test/MC/RISCV/rvq-valid.s +++ /dev/null @@ -1,184 +0,0 @@ -# RUN: llvm-mc %s -triple=riscv32 -mattr=+q -M no-aliases -show-encoding \ -# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+q < %s \ -# RUN: | llvm-objdump --no-print-imm-hex --mattr=+q -M no-aliases -d -r - \ -# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc %s -triple=riscv64 -mattr=+q -M no-aliases -show-encoding \ -# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+q < %s \ -# RUN: | llvm-objdump --no-print-imm-hex --mattr=+q -M no-aliases -d -r - \ -# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s - -# Support for the 'Q' extension implies support for 'D' and 'F' - -# CHECK-ASM-AND-OBJ: fadd.d fs10, fs11, ft8, dyn -# CHECK-ASM: encoding: [0x53,0xfd,0xcd,0x03] -fadd.d f26, f27, f28, dyn - -# CHECK-ASM-AND-OBJ: fadd.s fs10, fs11, ft8 -# CHECK-ASM: encoding: [0x53,0xfd,0xcd,0x01] -fadd.s f26, f27, f28 - -# CHECK-ASM-AND-OBJ: flq ft0, 12(a0) -# CHECK-ASM: encoding: [0x07,0x40,0xc5,0x00] -flq f0, 12(a0) -# CHECK-ASM-AND-OBJ: flq ft1, 4(ra) -# CHECK-ASM: encoding: [0x87,0xc0,0x40,0x00] -flq f1, +4(ra) -# CHECK-ASM-AND-OBJ: flq ft2, -2048(a3) -# CHECK-ASM: encoding: [0x07,0xc1,0x06,0x80] -flq f2, -2048(x13) -# CHECK-ASM: flq ft3, %lo(2048)(s1) # encoding: [0x87,0xc1,0bAAAA0100,A] -# CHECK-OBJ: flq ft3, -2048(s1) -flq f3, %lo(2048)(s1) -# CHECK-ASM-AND-OBJ: flq ft4, 2047(s2) -# CHECK-ASM: encoding: [0x07,0x42,0xf9,0x7f] -flq f4, 2047(s2) -# CHECK-ASM-AND-OBJ: flq ft5, 0(s3) -# CHECK-ASM: encoding: [0x87,0xc2,0x09,0x00] -flq f5, 0(s3) - -# CHECK-ASM-AND-OBJ: fsq ft6, 2047(s4) -# CHECK-ASM: encoding: [0xa7,0x4f,0x6a,0x7e] -fsq f6, 2047(s4) -# CHECK-ASM-AND-OBJ: fsq ft7, -2048(s5) -# CHECK-ASM: encoding: [0x27,0xc0,0x7a,0x80] -fsq f7, -2048(s5) -# CHECK-ASM: fsq fs0, %lo(2048)(s6) # encoding: [0x27'A',0x40'A',0x8b'A',A] -# CHECK-OBJ: fsq fs0, -2048(s6) -fsq f8, %lo(2048)(s6) -# CHECK-ASM-AND-OBJ: fsq fs1, 999(s7) -# CHECK-ASM: encoding: [0xa7,0xc3,0x9b,0x3e] -fsq f9, 999(s7) - -# CHECK-ASM-AND-OBJ: fmadd.q fa0, fa1, fa2, fa3, dyn -# CHECK-ASM: encoding: [0x43,0xf5,0xc5,0x6e] -fmadd.q f10, f11, f12, f13, dyn -# CHECK-ASM-AND-OBJ: fmsub.q fa4, fa5, fa6, fa7, dyn -# CHECK-ASM: encoding: [0x47,0xf7,0x07,0x8f] -fmsub.q f14, f15, f16, f17, dyn -# CHECK-ASM-AND-OBJ: fnmsub.q fs2, fs3, fs4, fs5, dyn -# CHECK-ASM: encoding: [0x4b,0xf9,0x49,0xaf] -fnmsub.q f18, f19, f20, f21, dyn -# CHECK-ASM-AND-OBJ: fnmadd.q fs6, fs7, fs8, fs9, dyn -# CHECK-ASM: encoding: [0x4f,0xfb,0x8b,0xcf] -fnmadd.q f22, f23, f24, f25, dyn - -# CHECK-ASM-AND-OBJ: fadd.q fs10, fs11, ft8, dyn -# CHECK-ASM: encoding: [0x53,0xfd,0xcd,0x07] -fadd.q f26, f27, f28, dyn -# CHECK-ASM-AND-OBJ: fsub.q ft9, ft10, ft11, dyn -# CHECK-ASM: encoding: [0xd3,0x7e,0xff,0x0f] -fsub.q f29, f30, f31, dyn -# CHECK-ASM-AND-OBJ: fmul.q ft0, ft1, ft2, dyn -# CHECK-ASM: encoding: [0x53,0xf0,0x20,0x16] -fmul.q ft0, ft1, ft2, dyn -# CHECK-ASM-AND-OBJ: fdiv.q ft3, ft4, ft5, dyn -# CHECK-ASM: encoding: [0xd3,0x71,0x52,0x1e] -fdiv.q ft3, ft4, ft5, dyn -# CHECK-ASM-AND-OBJ: fsqrt.q ft6, ft7, dyn -# CHECK-ASM: encoding: [0x53,0xf3,0x03,0x5e] -fsqrt.q ft6, ft7, dyn -# CHECK-ASM-AND-OBJ: fsgnj.q fs1, fa0, fa1 -# CHECK-ASM: encoding: [0xd3,0x04,0xb5,0x26] -fsgnj.q fs1, fa0, fa1 -# CHECK-ASM-AND-OBJ: fsgnjn.q fa1, fa3, fa4 -# CHECK-ASM: encoding: [0xd3,0x95,0xe6,0x26] -fsgnjn.q fa1, fa3, fa4 -# CHECK-ASM-AND-OBJ: fsgnjx.q fa3, fa2, fa1 -# CHECK-ASM: encoding: [0xd3,0x26,0xb6,0x26] -fsgnjx.q fa3, fa2, fa1 -# CHECK-ASM-AND-OBJ: fmin.q fa5, fa6, fa7 -# CHECK-ASM: encoding: [0xd3,0x07,0x18,0x2f] -fmin.q fa5, fa6, fa7 -# CHECK-ASM-AND-OBJ: fmax.q fs2, fs3, fs4 -# CHECK-ASM: encoding: [0x53,0x99,0x49,0x2f] -fmax.q fs2, fs3, fs4 - -# CHECK-ASM-AND-OBJ: fcvt.s.q fs5, fs6, dyn -# CHECK-ASM: encoding: [0xd3,0x7a,0x3b,0x40] -fcvt.s.q fs5, fs6, dyn -# CHECK-ASM-AND-OBJ: fcvt.q.s fs7, fs8 -# CHECK-ASM: encoding: [0xd3,0x0b,0x0c,0x46] -fcvt.q.s fs7, fs8 -# CHECK-ASM-AND-OBJ: fcvt.q.s fs7, fs8, rup -# CHECK-ASM: encoding: [0xd3,0x3b,0x0c,0x46] -fcvt.q.s fs7, fs8, rup -# CHECK-ASM-AND-OBJ: fcvt.d.q fs5, fs6, dyn -# CHECK-ASM: encoding: [0xd3,0x7a,0x3b,0x42] -fcvt.d.q fs5, fs6, dyn -# CHECK-ASM-AND-OBJ: fcvt.q.d fs7, fs8 -# CHECK-ASM: encoding: [0xd3,0x0b,0x1c,0x46] -fcvt.q.d fs7, fs8 -# CHECK-ASM-AND-OBJ: fcvt.q.d fs7, fs8, rup -# CHECK-ASM: encoding: [0xd3,0x3b,0x1c,0x46] -fcvt.q.d fs7, fs8, rup -# CHECK-ASM-AND-OBJ: feq.q a1, fs8, fs9 -# CHECK-ASM: encoding: [0xd3,0x25,0x9c,0xa7] -feq.q a1, fs8, fs9 -# CHECK-ASM-AND-OBJ: flt.q a2, fs10, fs11 -# CHECK-ASM: encoding: [0x53,0x16,0xbd,0xa7] -flt.q a2, fs10, fs11 -# CHECK-ASM-AND-OBJ: fle.q a3, ft8, ft9 -# CHECK-ASM: encoding: [0xd3,0x06,0xde,0xa7] -fle.q a3, ft8, ft9 -# CHECK-ASM-AND-OBJ: fclass.q a3, ft10 -# CHECK-ASM: encoding: [0xd3,0x16,0x0f,0xe6] -fclass.q a3, ft10 - -# CHECK-ASM-AND-OBJ: fcvt.w.q a4, ft11, dyn -# CHECK-ASM: encoding: [0x53,0xf7,0x0f,0xc6] -fcvt.w.q a4, ft11, dyn -# CHECK-ASM-AND-OBJ: fcvt.q.w ft0, a5 -# CHECK-ASM: encoding: [0x53,0x80,0x07,0xd6] -fcvt.q.w ft0, a5 -# CHECK-ASM-AND-OBJ: fcvt.q.w ft0, a5, rup -# CHECK-ASM: encoding: [0x53,0xb0,0x07,0xd6] -fcvt.q.w ft0, a5, rup -# CHECK-ASM-AND-OBJ: fcvt.q.wu ft1, a6 -# CHECK-ASM: encoding: [0xd3,0x00,0x18,0xd6] -fcvt.q.wu ft1, a6 -# CHECK-ASM-AND-OBJ: fcvt.q.wu ft1, a6, rup -# CHECK-ASM: encoding: [0xd3,0x30,0x18,0xd6] -fcvt.q.wu ft1, a6, rup - -# Rounding modes - -# CHECK-ASM-AND-OBJ: fmadd.q fa0, fa1, fa2, fa3, rne -# CHECK-ASM: encoding: [0x43,0x85,0xc5,0x6e] -fmadd.q f10, f11, f12, f13, rne -# CHECK-ASM-AND-OBJ: fmsub.q fa4, fa5, fa6, fa7, rtz -# CHECK-ASM: encoding: [0x47,0x97,0x07,0x8f] -fmsub.q f14, f15, f16, f17, rtz -# CHECK-ASM-AND-OBJ: fnmsub.q fs2, fs3, fs4, fs5, rdn -# CHECK-ASM: encoding: [0x4b,0xa9,0x49,0xaf] -fnmsub.q f18, f19, f20, f21, rdn -# CHECK-ASM-AND-OBJ: fnmadd.q fs6, fs7, fs8, fs9, rup -# CHECK-ASM: encoding: [0x4f,0xbb,0x8b,0xcf] -fnmadd.q f22, f23, f24, f25, rup - -# CHECK-ASM-AND-OBJ: fadd.q fs10, fs11, ft8, rmm -# CHECK-ASM: encoding: [0x53,0xcd,0xcd,0x07] -fadd.q f26, f27, f28, rmm -# CHECK-ASM-AND-OBJ: fsub.q ft9, ft10, ft11 -# CHECK-ASM: encoding: [0xd3,0x7e,0xff,0x0f] -fsub.q f29, f30, f31, dyn -# CHECK-ASM-AND-OBJ: fmul.q ft0, ft1, ft2, rne -# CHECK-ASM: encoding: [0x53,0x80,0x20,0x16] -fmul.q ft0, ft1, ft2, rne -# CHECK-ASM-AND-OBJ: fdiv.q ft3, ft4, ft5, rtz -# CHECK-ASM: encoding: [0xd3,0x11,0x52,0x1e] -fdiv.q ft3, ft4, ft5, rtz - -# CHECK-ASM-AND-OBJ: fsqrt.q ft6, ft7, rdn -# CHECK-ASM: encoding: [0x53,0xa3,0x03,0x5e] -fsqrt.q ft6, ft7, rdn -# CHECK-ASM-AND-OBJ: fcvt.s.q fs5, fs6, rup -# CHECK-ASM: encoding: [0xd3,0x3a,0x3b,0x40] -fcvt.s.q fs5, fs6, rup -# CHECK-ASM-AND-OBJ: fcvt.w.q a4, ft11, rmm -# CHECK-ASM: encoding: [0x53,0xc7,0x0f,0xc6] -fcvt.w.q a4, ft11, rmm -# CHECK-ASM-AND-OBJ: fcvt.wu.q a5, ft10, dyn -# CHECK-ASM: encoding: [0xd3,0x77,0x1f,0xc6] -fcvt.wu.q a5, ft10, dyn diff --git a/llvm/test/MC/RISCV/xandesvdot-valid.s b/llvm/test/MC/RISCV/xandesvdot-valid.s deleted file mode 100644 index 06433790219de..0000000000000 --- a/llvm/test/MC/RISCV/xandesvdot-valid.s +++ /dev/null @@ -1,51 +0,0 @@ -# XAndesVDot - Andes Vector Dot Product Extension -# RUN: llvm-mc %s -triple=riscv32 -mattr=+xandesvdot -show-encoding \ -# RUN: | FileCheck -check-prefixes=CHECK-ASM %s -# RUN: llvm-mc -filetype=obj -triple riscv32 -mattr=+xandesvdot < %s \ -# RUN: | llvm-objdump --mattr=+xandesvdot -M no-aliases -d -r - \ -# RUN: | FileCheck -check-prefixes=CHECK-OBJ %s -# RUN: not llvm-mc -triple=riscv32 -show-encoding %s 2>&1 \ -# RUN: | FileCheck %s --check-prefix=CHECK-ERROR -# RUN: llvm-mc %s -triple=riscv64 -mattr=+xandesvdot -show-encoding \ -# RUN: | FileCheck -check-prefixes=CHECK-ASM %s -# RUN: llvm-mc -filetype=obj -triple riscv64 -mattr=+xandesvdot < %s \ -# RUN: | llvm-objdump --mattr=+xandesvdot -M no-aliases -d -r - \ -# RUN: | FileCheck -check-prefixes=CHECK-OBJ %s -# RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \ -# RUN: | FileCheck %s --check-prefix=CHECK-ERROR - -# CHECK-OBJ: nds.vd4dots.vv v8, v10, v12 -# CHECK-ASM: nds.vd4dots.vv v8, v10, v12 -# CHECK-ASM: encoding: [0x5b,0x44,0xc5,0x12] -# CHECK-ERROR: instruction requires the following: 'XAndesVDot' (Andes Vector Dot Product Extension){{$}} -nds.vd4dots.vv v8, v10, v12 - -# CHECK-OBJ: nds.vd4dots.vv v8, v10, v12, v0.t -# CHECK-ASM: nds.vd4dots.vv v8, v10, v12, v0.t -# CHECK-ASM: encoding: [0x5b,0x44,0xc5,0x10] -# CHECK-ERROR: instruction requires the following: 'XAndesVDot' (Andes Vector Dot Product Extension){{$}} -nds.vd4dots.vv v8, v10, v12, v0.t - -# CHECK-OBJ: nds.vd4dotu.vv v8, v10, v12 -# CHECK-ASM: nds.vd4dotu.vv v8, v10, v12 -# CHECK-ASM: encoding: [0x5b,0x44,0xc5,0x1e] -# CHECK-ERROR: instruction requires the following: 'XAndesVDot' (Andes Vector Dot Product Extension){{$}} -nds.vd4dotu.vv v8, v10, v12 - -# CHECK-OBJ: nds.vd4dotu.vv v8, v10, v12, v0.t -# CHECK-ASM: nds.vd4dotu.vv v8, v10, v12, v0.t -# CHECK-ASM: encoding: [0x5b,0x44,0xc5,0x1c] -# CHECK-ERROR: instruction requires the following: 'XAndesVDot' (Andes Vector Dot Product Extension){{$}} -nds.vd4dotu.vv v8, v10, v12, v0.t - -# CHECK-OBJ: nds.vd4dotsu.vv v8, v10, v12 -# CHECK-ASM: nds.vd4dotsu.vv v8, v10, v12 -# CHECK-ASM: encoding: [0x5b,0x44,0xc5,0x16] -# CHECK-ERROR: instruction requires the following: 'XAndesVDot' (Andes Vector Dot Product Extension){{$}} -nds.vd4dotsu.vv v8, v10, v12 - -# CHECK-OBJ: nds.vd4dotsu.vv v8, v10, v12, v0.t -# CHECK-ASM: nds.vd4dotsu.vv v8, v10, v12, v0.t -# CHECK-ASM: encoding: [0x5b,0x44,0xc5,0x14] -# CHECK-ERROR: instruction requires the following: 'XAndesVDot' (Andes Vector Dot Product Extension){{$}} -nds.vd4dotsu.vv v8, v10, v12, v0.t diff --git a/llvm/test/MC/RISCV/zfa-invalid.s b/llvm/test/MC/RISCV/zfa-invalid.s index cedc9279db3cb..c2537c3fc5102 100644 --- a/llvm/test/MC/RISCV/zfa-invalid.s +++ b/llvm/test/MC/RISCV/zfa-invalid.s @@ -1,5 +1,5 @@ -# RUN: not llvm-mc -triple riscv64 -mattr=+zfa,+q,+zfh < %s 2>&1 | FileCheck -check-prefixes=CHECK-NO-RV32 %s -# RUN: not llvm-mc -triple riscv32 -mattr=+zfa,+q,+zfh < %s 2>&1 | FileCheck -check-prefixes=CHECK-NO-RV64 %s +# RUN: not llvm-mc -triple riscv64 -mattr=+zfa,+d,+zfh < %s 2>&1 | FileCheck -check-prefixes=CHECK-NO-RV32 %s +# RUN: not llvm-mc -triple riscv32 -mattr=+zfa,+d,+zfh < %s 2>&1 | FileCheck -check-prefixes=CHECK-NO-RV64 %s # Invalid rounding modes # CHECK-NO-RV64: error: operand must be 'rtz' floating-point rounding mode @@ -35,10 +35,6 @@ fli.d ft1, 3.560000e+02 # CHECK-NO-RV32: error: operand must be a valid floating-point constant fli.h ft1, 1.600000e+00 -# CHECK-NO-RV64: error: operand must be a valid floating-point constant -# CHECK-NO-RV32: error: operand must be a valid floating-point constant -fli.q ft1, 2.250000e+00 - # CHECK-NO-RV64: error: invalid floating point immediate # CHECK-NO-RV32: error: invalid floating point immediate fli.s ft1, -min @@ -76,11 +72,6 @@ fli.d ft1, 1.1754943508222875079687365372222456778186655567720875215087517062784 # CHECK-NO-RV32: error: operand must be a valid floating-point constant fli.h ft1, 1.1754943508222875079687365372222456778186655567720875215087517062784172594547271728515625e-38 -# Don't accept single precision minimum for quad. -# CHECK-NO-RV64: error: operand must be a valid floating-point constant -# CHECK-NO-RV32: error: operand must be a valid floating-point constant -fli.q ft1, 1.1754943508222875079687365372222456778186655567720875215087517062784172594547271728515625e-38 - # Don't accept integers. # CHECK-NO-RV32: error: invalid floating point immediate # CHECK-NO-RV64: error: invalid floating point immediate diff --git a/llvm/test/MC/RISCV/zfa-quad-invalid.s b/llvm/test/MC/RISCV/zfa-quad-invalid.s deleted file mode 100644 index 3ca89c6ebe627..0000000000000 --- a/llvm/test/MC/RISCV/zfa-quad-invalid.s +++ /dev/null @@ -1,42 +0,0 @@ -# RUN: not llvm-mc -triple riscv32 -mattr=+zfa,+zfh \ -# RUN: -M no-aliases -show-encoding < %s 2>&1 \ -# RUN: | FileCheck -check-prefixes=CHECK-NO-EXTQ %s -# RUN: not llvm-mc -triple riscv64 -mattr=+zfa,+zfh \ -# RUN: -M no-aliases -show-encoding < %s 2>&1 \ -# RUN: | FileCheck -check-prefixes=CHECK-NO-EXTQ %s - -# CHECK-NO-EXTQ: error: instruction requires the following: 'Q' (Quad-Precision Floating-Point){{$}} -fminm.q fa0, fa1, fa2 - -# CHECK-NO-EXTQ: error: instruction requires the following: 'Q' (Quad-Precision Floating-Point){{$}} -fmaxm.q fs3, fs4, fs5 - -# CHECK-NO-EXTQ: error: instruction requires the following: 'Q' (Quad-Precision Floating-Point){{$}} -fround.q fs1, fs2 - -# CHECK-NO-EXTQ: error: instruction requires the following: 'Q' (Quad-Precision Floating-Point){{$}} -fround.q fs1, fs2, dyn - -# CHECK-NO-EXTQ: error: instruction requires the following: 'Q' (Quad-Precision Floating-Point){{$}} -fround.q fs1, fs2, rtz - -# CHECK-NO-EXTQ: error: instruction requires the following: 'Q' (Quad-Precision Floating-Point){{$}} -fround.q fs1, fs2, rne - -# CHECK-NO-EXTQ: error: instruction requires the following: 'Q' (Quad-Precision Floating-Point){{$}} -froundnx.q fs1, fs2 - -# CHECK-NO-EXTQ: error: instruction requires the following: 'Q' (Quad-Precision Floating-Point){{$}} -froundnx.q fs1, fs2, dyn - -# CHECK-NO-EXTQ: error: instruction requires the following: 'Q' (Quad-Precision Floating-Point){{$}} -froundnx.q fs1, fs2, rtz - -# CHECK-NO-EXTQ: error: instruction requires the following: 'Q' (Quad-Precision Floating-Point){{$}} -froundnx.q fs1, fs2, rne - -# CHECK-NO-EXTQ: error: instruction requires the following: 'Q' (Quad-Precision Floating-Point){{$}} -fltq.q a1, fs1, fs2 - -# CHECK-NO-EXTQ: error: instruction requires the following: 'Q' (Quad-Precision Floating-Point){{$}} -fleq.q a1, ft1, ft2 diff --git a/llvm/test/MC/RISCV/zfa-valid.s b/llvm/test/MC/RISCV/zfa-valid.s index edf830642c263..6e78a4c0f2584 100644 --- a/llvm/test/MC/RISCV/zfa-valid.s +++ b/llvm/test/MC/RISCV/zfa-valid.s @@ -1,18 +1,18 @@ -# RUN: llvm-mc %s -triple=riscv32 -mattr=+zfa,+q,+zfh -M no-aliases -show-encoding \ +# RUN: llvm-mc %s -triple=riscv32 -mattr=+zfa,+d,+zfh -M no-aliases -show-encoding \ # RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc %s -triple=riscv64 -mattr=+zfa,+q,+zfh -M no-aliases -show-encoding \ +# RUN: llvm-mc %s -triple=riscv64 -mattr=+zfa,+d,+zfh -M no-aliases -show-encoding \ # RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+zfa,+q,+zfh < %s \ -# RUN: | llvm-objdump --mattr=+zfa,+q,+zfh -M no-aliases -d -r - \ +# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+zfa,+d,+zfh < %s \ +# RUN: | llvm-objdump --mattr=+zfa,+d,+zfh -M no-aliases -d -r - \ # RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zfa,+q,+zfh < %s \ -# RUN: | llvm-objdump --mattr=+zfa,+q,+zfh -M no-aliases -d -r - \ +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zfa,+d,+zfh < %s \ +# RUN: | llvm-objdump --mattr=+zfa,+d,+zfh -M no-aliases -d -r - \ # RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s # -# RUN: not llvm-mc -triple riscv32 -mattr=+q,+zfh \ +# RUN: not llvm-mc -triple riscv32 -mattr=+d,+zfh \ # RUN: -M no-aliases -show-encoding < %s 2>&1 \ # RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s -# RUN: not llvm-mc -triple riscv64 -mattr=+q,+zfh \ +# RUN: not llvm-mc -triple riscv64 -mattr=+d,+zfh \ # RUN: -M no-aliases -show-encoding < %s 2>&1 \ # RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s @@ -933,311 +933,6 @@ fli.h ft1, INF # CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} fli.h ft1, nan -# CHECK-ASM-AND-OBJ: fli.q ft1, -1.0 -# CHECK-ASM: encoding: [0xd3,0x00,0x10,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, -1.000000e+00 - -# CHECK-ASM-AND-OBJ: fli.q ft1, -1.0 -# CHECK-ASM: encoding: [0xd3,0x00,0x10,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, -0x1p+0 - -# CHECK-ASM-AND-OBJ: fli.q ft1, min -# CHECK-ASM: encoding: [0xd3,0x80,0x10,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, min - -# CHECK-ASM-AND-OBJ: fli.q ft1, 1.52587890625e-05 -# CHECK-ASM: encoding: [0xd3,0x00,0x11,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 1.52587890625e-05 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 1.52587890625e-05 -# CHECK-ASM: encoding: [0xd3,0x00,0x11,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 0x1p-16 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 3.0517578125e-05 -# CHECK-ASM: encoding: [0xd3,0x80,0x11,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 3.0517578125e-05 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 3.0517578125e-05 -# CHECK-ASM: encoding: [0xd3,0x80,0x11,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 0x1p-15 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 0.00390625 -# CHECK-ASM: encoding: [0xd3,0x00,0x12,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 3.906250e-03 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 0.00390625 -# CHECK-ASM: encoding: [0xd3,0x00,0x12,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 0x1p-8 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 0.0078125 -# CHECK-ASM: encoding: [0xd3,0x80,0x12,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 7.812500e-03 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 0.0078125 -# CHECK-ASM: encoding: [0xd3,0x80,0x12,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 0x1p-7 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 0.0625 -# CHECK-ASM: encoding: [0xd3,0x00,0x13,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 6.250000e-02 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 0.0625 -# CHECK-ASM: encoding: [0xd3,0x00,0x13,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 0x1p-4 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 0.125 -# CHECK-ASM: encoding: [0xd3,0x80,0x13,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 1.250000e-01 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 0.125 -# CHECK-ASM: encoding: [0xd3,0x80,0x13,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 0x1p-3 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 0.25 -# CHECK-ASM: encoding: [0xd3,0x00,0x14,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 2.500000e-01 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 0.25 -# CHECK-ASM: encoding: [0xd3,0x00,0x14,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 0x1p-2 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 0.3125 -# CHECK-ASM: encoding: [0xd3,0x80,0x14,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 3.125000e-01 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 0.3125 -# CHECK-ASM: encoding: [0xd3,0x80,0x14,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 0x1.4p-2 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 0.375 -# CHECK-ASM: encoding: [0xd3,0x00,0x15,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 3.750000e-01 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 0.375 -# CHECK-ASM: encoding: [0xd3,0x00,0x15,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 0x1.8p-2 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 0.4375 -# CHECK-ASM: encoding: [0xd3,0x80,0x15,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 4.375000e-01 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 0.4375 -# CHECK-ASM: encoding: [0xd3,0x80,0x15,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 0x1.cp-2 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 0.5 -# CHECK-ASM: encoding: [0xd3,0x00,0x16,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 5.000000e-01 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 0.5 -# CHECK-ASM: encoding: [0xd3,0x00,0x16,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 0x1p-1 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 0.625 -# CHECK-ASM: encoding: [0xd3,0x80,0x16,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 6.250000e-01 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 0.625 -# CHECK-ASM: encoding: [0xd3,0x80,0x16,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 0x1.4p-1 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 0.75 -# CHECK-ASM: encoding: [0xd3,0x00,0x17,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 7.500000e-01 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 0.75 -# CHECK-ASM: encoding: [0xd3,0x00,0x17,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 0x1.8p-1 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 0.875 -# CHECK-ASM: encoding: [0xd3,0x80,0x17,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 8.750000e-01 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 0.875 -# CHECK-ASM: encoding: [0xd3,0x80,0x17,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 0x1.cp-1 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 1.0 -# CHECK-ASM: encoding: [0xd3,0x00,0x18,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 1.000000e+00 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 1.0 -# CHECK-ASM: encoding: [0xd3,0x00,0x18,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 0x1p+0 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 1.25 -# CHECK-ASM: encoding: [0xd3,0x80,0x18,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 1.250000e+00 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 1.25 -# CHECK-ASM: encoding: [0xd3,0x80,0x18,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 0x1.4p+0 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 1.5 -# CHECK-ASM: encoding: [0xd3,0x00,0x19,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 1.500000e+00 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 1.5 -# CHECK-ASM: encoding: [0xd3,0x00,0x19,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 0x1.8p+0 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 1.75 -# CHECK-ASM: encoding: [0xd3,0x80,0x19,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 1.750000e+00 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 1.75 -# CHECK-ASM: encoding: [0xd3,0x80,0x19,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 0x1.cp+0 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 2.0 -# CHECK-ASM: encoding: [0xd3,0x00,0x1a,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 2.000000e+00 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 2.0 -# CHECK-ASM: encoding: [0xd3,0x00,0x1a,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 0x1p+1 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 2.5 -# CHECK-ASM: encoding: [0xd3,0x80,0x1a,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 2.500000e+00 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 2.5 -# CHECK-ASM: encoding: [0xd3,0x80,0x1a,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 0x1.4p+1 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 3.0 -# CHECK-ASM: encoding: [0xd3,0x00,0x1b,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 3.000000e+00 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 3.0 -# CHECK-ASM: encoding: [0xd3,0x00,0x1b,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 0x1.8p+1 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 4.0 -# CHECK-ASM: encoding: [0xd3,0x80,0x1b,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 4.000000e+00 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 4.0 -# CHECK-ASM: encoding: [0xd3,0x80,0x1b,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 0x1p+2 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 8.0 -# CHECK-ASM: encoding: [0xd3,0x00,0x1c,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 8.000000e+00 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 8.0 -# CHECK-ASM: encoding: [0xd3,0x00,0x1c,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 0x1p+3 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 16.0 -# CHECK-ASM: encoding: [0xd3,0x80,0x1c,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 1.600000e+01 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 16.0 -# CHECK-ASM: encoding: [0xd3,0x80,0x1c,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 0x1p+4 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 128.0 -# CHECK-ASM: encoding: [0xd3,0x00,0x1d,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 1.280000e+02 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 128.0 -# CHECK-ASM: encoding: [0xd3,0x00,0x1d,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 0x1p+7 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 256.0 -# CHECK-ASM: encoding: [0xd3,0x80,0x1d,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 2.560000e+02 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 256.0 -# CHECK-ASM: encoding: [0xd3,0x80,0x1d,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 0x1p+8 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 32768.0 -# CHECK-ASM: encoding: [0xd3,0x00,0x1e,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 3.276800e+04 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 32768.0 -# CHECK-ASM: encoding: [0xd3,0x00,0x1e,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 0x1p+15 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 65536.0 -# CHECK-ASM: encoding: [0xd3,0x80,0x1e,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 6.553600e+04 - -# CHECK-ASM-AND-OBJ: fli.q ft1, 65536.0 -# CHECK-ASM: encoding: [0xd3,0x80,0x1e,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, 0x1p+16 - -# CHECK-ASM-AND-OBJ: fli.q ft1, inf -# CHECK-ASM: encoding: [0xd3,0x00,0x1f,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, INF - -# CHECK-ASM-AND-OBJ: fli.q ft1, nan -# CHECK-ASM: encoding: [0xd3,0x80,0x1f,0xf6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fli.q ft1, nan - # CHECK-ASM-AND-OBJ: fminm.s fa0, fa1, fa2 # CHECK-ASM: encoding: [0x53,0xa5,0xc5,0x28] # CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} @@ -1268,16 +963,6 @@ fminm.h fa0, fa1, fa2 # CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} fmaxm.h fs3, fs4, fs5 -# CHECK-ASM-AND-OBJ: fminm.q fa0, fa1, fa2 -# CHECK-ASM: encoding: [0x53,0xa5,0xc5,0x2e] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fminm.q fa0, fa1, fa2 - -# CHECK-ASM-AND-OBJ: fmaxm.q fs3, fs4, fs5 -# CHECK-ASM: encoding: [0xd3,0x39,0x5a,0x2f] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fmaxm.q fs3, fs4, fs5 - # CHECK-ASM-AND-OBJ: fround.s fs1, fs2, dyn # CHECK-ASM: encoding: [0xd3,0x74,0x49,0x40] # CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} @@ -1398,46 +1083,6 @@ froundnx.h ft1, fa1, rtz # CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} froundnx.h fs1, fs2, rne -# CHECK-ASM-AND-OBJ: fround.q fs1, fs2, dyn -# CHECK-ASM: encoding: [0xd3,0x74,0x49,0x46] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fround.q fs1, fs2 - -# CHECK-ASM-AND-OBJ: fround.q fs1, fs2, dyn -# CHECK-ASM: encoding: [0xd3,0x74,0x49,0x46] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fround.q fs1, fs2, dyn - -# CHECK-ASM-AND-OBJ: fround.q fs1, fs2, rtz -# CHECK-ASM: encoding: [0xd3,0x14,0x49,0x46] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fround.q fs1, fs2, rtz - -# CHECK-ASM-AND-OBJ: fround.q fs1, fs2, rne -# CHECK-ASM: encoding: [0xd3,0x04,0x49,0x46] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fround.q fs1, fs2, rne - -# CHECK-ASM-AND-OBJ: froundnx.q fs1, fs2, dyn -# CHECK-ASM: encoding: [0xd3,0x74,0x59,0x46] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -froundnx.q fs1, fs2 - -# CHECK-ASM-AND-OBJ: froundnx.q fs1, fs2, dyn -# CHECK-ASM: encoding: [0xd3,0x74,0x59,0x46] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -froundnx.q fs1, fs2, dyn - -# CHECK-ASM-AND-OBJ: froundnx.q fs1, fs2, rtz -# CHECK-ASM: encoding: [0xd3,0x14,0x59,0x46] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -froundnx.q fs1, fs2, rtz - -# CHECK-ASM-AND-OBJ: froundnx.q fs1, fs2, rne -# CHECK-ASM: encoding: [0xd3,0x04,0x59,0x46] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -froundnx.q fs1, fs2, rne - # CHECK-ASM-AND-OBJ: fcvtmod.w.d a1, ft1, rtz # CHECK-ASM: encoding: [0xd3,0x95,0x80,0xc2] # CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} @@ -1502,23 +1147,3 @@ fgtq.h a1, fs1, fs2 # CHECK-ASM: encoding: [0xd3,0x45,0x11,0xa4] # CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} fgeq.h a1, ft1, ft2 - -# CHECK-ASM-AND-OBJ: fltq.q a1, fs1, fs2 -# CHECK-ASM: encoding: [0xd3,0xd5,0x24,0xa7] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fltq.q a1, fs1, fs2 - -# CHECK-ASM-AND-OBJ: fleq.q a1, ft1, ft2 -# CHECK-ASM: encoding: [0xd3,0xc5,0x20,0xa6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fleq.q a1, ft1, ft2 - -# CHECK-ASM-AND-OBJ: fltq.q a1, fs2, fs1 -# CHECK-ASM: encoding: [0xd3,0x55,0x99,0xa6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fgtq.q a1, fs1, fs2 - -# CHECK-ASM-AND-OBJ: fleq.q a1, ft2, ft1 -# CHECK-ASM: encoding: [0xd3,0x45,0x11,0xa6] -# CHECK-NO-EXT: error: instruction requires the following: 'Zfa' (Additional Floating-Point){{$}} -fgeq.q a1, ft1, ft2 diff --git a/llvm/test/TableGen/directive1.td b/llvm/test/TableGen/directive1.td index 8270de5eb2132..2f877029c8396 100644 --- a/llvm/test/TableGen/directive1.td +++ b/llvm/test/TableGen/directive1.td @@ -84,14 +84,6 @@ def TDL_DirA : Directive<"dira"> { // CHECK-EMPTY: // CHECK-NEXT: static constexpr std::size_t Category_enumSize = 6; // CHECK-EMPTY: -// CHECK-NEXT: enum class SourceLanguage : uint32_t { -// CHECK-NEXT: C = 1U, -// CHECK-NEXT: Fortran = 2U, -// CHECK-NEXT: LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/Fortran) -// CHECK-NEXT: }; -// CHECK-EMPTY: -// CHECK-NEXT: static constexpr std::size_t SourceLanguage_enumSize = 2; -// CHECK-EMPTY: // CHECK-NEXT: enum class Directive { // CHECK-NEXT: TDLD_dira, // CHECK-NEXT: }; @@ -137,7 +129,6 @@ def TDL_DirA : Directive<"dira"> { // CHECK-NEXT: constexpr std::size_t getMaxLeafCount() { return 0; } // CHECK-NEXT: LLVM_ABI Association getDirectiveAssociation(Directive D); // CHECK-NEXT: LLVM_ABI Category getDirectiveCategory(Directive D); -// CHECK-NEXT: LLVM_ABI SourceLanguage getDirectiveLanguages(Directive D); // CHECK-NEXT: LLVM_ABI AKind getAKind(StringRef); // CHECK-NEXT: LLVM_ABI llvm::StringRef getTdlAKindName(AKind); // CHECK-EMPTY: @@ -399,14 +390,6 @@ def TDL_DirA : Directive<"dira"> { // IMPL-NEXT: llvm_unreachable("Unexpected directive"); // IMPL-NEXT: } // IMPL-EMPTY: -// IMPL-NEXT: llvm::tdl::SourceLanguage llvm::tdl::getDirectiveLanguages(llvm::tdl::Directive D) { -// IMPL-NEXT: switch (D) { -// IMPL-NEXT: case llvm::tdl::TDLD_dira: -// IMPL-NEXT: return llvm::tdl::SourceLanguage::C | llvm::tdl::SourceLanguage::Fortran; -// IMPL-NEXT: } // switch(D) -// IMPL-NEXT: llvm_unreachable("Unexpected directive"); -// IMPL-NEXT: } -// IMPL-EMPTY: // IMPL-NEXT: static_assert(sizeof(llvm::tdl::Directive) == sizeof(int)); // IMPL-NEXT: {{.*}} static const llvm::tdl::Directive LeafConstructTable[][2] = { // IMPL-NEXT: {llvm::tdl::TDLD_dira, static_cast(0),}, diff --git a/llvm/test/TableGen/directive2.td b/llvm/test/TableGen/directive2.td index 58740cb8e1d96..3f1a44cfdd4f9 100644 --- a/llvm/test/TableGen/directive2.td +++ b/llvm/test/TableGen/directive2.td @@ -75,14 +75,6 @@ def TDL_DirA : Directive<"dira"> { // CHECK-EMPTY: // CHECK-NEXT: static constexpr std::size_t Category_enumSize = 6; // CHECK-EMPTY: -// CHECK-NEXT: enum class SourceLanguage : uint32_t { -// CHECK-NEXT: C = 1U, -// CHECK-NEXT: Fortran = 2U, -// CHECK-NEXT: LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/Fortran) -// CHECK-NEXT: }; -// CHECK-EMPTY: -// CHECK-NEXT: static constexpr std::size_t SourceLanguage_enumSize = 2; -// CHECK-EMPTY: // CHECK-NEXT: enum class Directive { // CHECK-NEXT: TDLD_dira, // CHECK-NEXT: }; @@ -113,7 +105,6 @@ def TDL_DirA : Directive<"dira"> { // CHECK-NEXT: constexpr std::size_t getMaxLeafCount() { return 0; } // CHECK-NEXT: LLVM_ABI Association getDirectiveAssociation(Directive D); // CHECK-NEXT: LLVM_ABI Category getDirectiveCategory(Directive D); -// CHECK-NEXT: LLVM_ABI SourceLanguage getDirectiveLanguages(Directive D); // CHECK-NEXT: } // namespace tdl // CHECK-NEXT: } // namespace llvm // CHECK-NEXT: #endif // LLVM_Tdl_INC @@ -330,14 +321,6 @@ def TDL_DirA : Directive<"dira"> { // IMPL-NEXT: llvm_unreachable("Unexpected directive"); // IMPL-NEXT: } // IMPL-EMPTY: -// IMPL-NEXT: llvm::tdl::SourceLanguage llvm::tdl::getDirectiveLanguages(llvm::tdl::Directive D) { -// IMPL-NEXT: switch (D) { -// IMPL-NEXT: case llvm::tdl::TDLD_dira: -// IMPL-NEXT: return llvm::tdl::SourceLanguage::C | llvm::tdl::SourceLanguage::Fortran; -// IMPL-NEXT: } // switch(D) -// IMPL-NEXT: llvm_unreachable("Unexpected directive"); -// IMPL-NEXT: } -// IMPL-EMPTY: // IMPL-NEXT: static_assert(sizeof(llvm::tdl::Directive) == sizeof(int)); // IMPL-NEXT: {{.*}} static const llvm::tdl::Directive LeafConstructTable[][2] = { // IMPL-NEXT: {llvm::tdl::TDLD_dira, static_cast(0),}, diff --git a/llvm/test/ThinLTO/X86/cache-emit-asm.ll b/llvm/test/ThinLTO/X86/cache-emit-asm.ll deleted file mode 100644 index b6e5ca25a637d..0000000000000 --- a/llvm/test/ThinLTO/X86/cache-emit-asm.ll +++ /dev/null @@ -1,15 +0,0 @@ -;; This test runs thin LTO with cache only to look for memory errors, either -;; as crashes or sanitizer errors. MCAsmStreamer has specific assumptions about -;; the lifetime of the output stream that are easy to overlook (see #138194). - -; RUN: rm -rf %t && mkdir -p %t -; RUN: opt -module-hash -module-summary -thinlto-bc %s -o %t1.bc -; RUN: llvm-lto2 run -cache-dir %t/cache --filetype=asm -o %t.o %t1.bc -r=%t1.bc,globalfunc - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -define void @globalfunc() { -entry: - ret void -} diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll deleted file mode 100644 index 019f311406550..0000000000000 --- a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll +++ /dev/null @@ -1,44 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -S -p 'require,codegenprepare' -cgpp-huge-func=0 < %s | FileCheck %s - -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" -target triple = "x86_64-grtev4-linux-gnu" - -declare void @g(ptr) - -; %load and %load5 use the same address, %load5 is optimized first, %load is -; optimized later and reuse the same address computation instruction. We must -; make sure not to generate use before def error. - -define void @f(ptr %arg) { -; CHECK-LABEL: define void @f( -; CHECK-SAME: ptr [[ARG:%.*]]) { -; CHECK-NEXT: [[BB:.*:]] -; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i8, ptr [[ARG]], i64 -64 -; CHECK-NEXT: call void @g(ptr [[GETELEMENTPTR]]) -; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, ptr [[ARG]], i64 -64 -; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr [[SUNKADDR1]], align 8 -; CHECK-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[ARG]], i64 -56 -; CHECK-NEXT: [[LOAD4:%.*]] = load i32, ptr [[SUNKADDR]], align 8 -; CHECK-NEXT: [[LOAD5:%.*]] = load ptr, ptr [[SUNKADDR1]], align 8 -; CHECK-NEXT: [[TMP0:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 0) -; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP0]], 0 -; CHECK-NEXT: ret void -; -bb: - %getelementptr = getelementptr i8, ptr %arg, i64 -64 - %getelementptr1 = getelementptr i8, ptr %arg, i64 -56 - call void @g(ptr %getelementptr) - br label %bb3 - -bb3: - %load = load ptr, ptr %getelementptr, align 8 - %load4 = load i32, ptr %getelementptr1, align 8 - %load5 = load ptr, ptr %getelementptr, align 8 - %add = add i32 1, 0 - %icmp = icmp eq i32 %add, 0 - br i1 %icmp, label %bb7, label %bb7 - -bb7: - ret void -} diff --git a/llvm/test/Transforms/ConstraintElimination/and-implied-by-operands.ll b/llvm/test/Transforms/ConstraintElimination/and-implied-by-operands.ll index deefe05ddb317..8bd0b4100cff9 100644 --- a/llvm/test/Transforms/ConstraintElimination/and-implied-by-operands.ll +++ b/llvm/test/Transforms/ConstraintElimination/and-implied-by-operands.ll @@ -6,7 +6,8 @@ define i1 @test_second_and_condition_implied_by_first(i8 %x) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C_1:%.*]] = icmp ugt i8 [[X:%.*]], 10 ; CHECK-NEXT: [[T_1:%.*]] = icmp ugt i8 [[X]], 5 -; CHECK-NEXT: br i1 [[C_1]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i1 [[C_1]], true +; CHECK-NEXT: br i1 [[AND]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: ret i1 false ; CHECK: else: @@ -30,7 +31,8 @@ define i1 @test_first_and_condition_implied_by_second_ops(i8 %x) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C_1:%.*]] = icmp ugt i8 [[X:%.*]], 10 ; CHECK-NEXT: [[T_1:%.*]] = icmp ugt i8 [[X]], 5 -; CHECK-NEXT: br i1 [[C_1]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i1 true, [[C_1]] +; CHECK-NEXT: br i1 [[AND]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: ret i1 false ; CHECK: else: @@ -54,7 +56,8 @@ define i1 @test_second_and_condition_implied_by_first_select_form(i8 %x) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C_1:%.*]] = icmp ugt i8 [[X:%.*]], 10 ; CHECK-NEXT: [[T_1:%.*]] = icmp ugt i8 [[X]], 5 -; CHECK-NEXT: br i1 [[C_1]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[AND:%.*]] = select i1 [[C_1]], i1 true, i1 false +; CHECK-NEXT: br i1 [[AND]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: ret i1 false ; CHECK: else: @@ -102,7 +105,8 @@ define i1 @test_same_cond_for_and(i8 %x) { ; CHECK-LABEL: @test_same_cond_for_and( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C_1:%.*]] = icmp ugt i8 [[X:%.*]], 10 -; CHECK-NEXT: br i1 [[C_1]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i1 true, [[C_1]] +; CHECK-NEXT: br i1 [[AND]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: ret i1 false ; CHECK: else: @@ -148,7 +152,8 @@ define i1 @test_second_and_condition_not_implied_by_first(i8 %x) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C_1:%.*]] = icmp ugt i8 [[X:%.*]], 10 ; CHECK-NEXT: [[C_2:%.*]] = icmp ugt i8 [[X]], 5 -; CHECK-NEXT: br i1 [[C_1]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i1 true, [[C_1]] +; CHECK-NEXT: br i1 [[AND]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: ret i1 false ; CHECK: else: @@ -366,7 +371,8 @@ define i1 @test_and_used_in_false_branch(i8 %x) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C_1:%.*]] = icmp ugt i8 [[X:%.*]], 10 ; CHECK-NEXT: [[T_1:%.*]] = icmp ugt i8 [[X]], 5 -; CHECK-NEXT: br i1 [[C_1]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i1 [[C_1]], true +; CHECK-NEXT: br i1 [[AND]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: ret i1 true ; CHECK: else: @@ -391,7 +397,8 @@ define i1 @test_or_used_in_false_branch(i8 %x) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C_1:%.*]] = icmp ule i8 [[X:%.*]], 10 ; CHECK-NEXT: [[T_1:%.*]] = icmp ule i8 [[X]], 5 -; CHECK-NEXT: br i1 [[C_1]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[AND:%.*]] = or i1 [[C_1]], false +; CHECK-NEXT: br i1 [[AND]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: ret i1 [[T_1]] ; CHECK: else: @@ -416,7 +423,8 @@ define i1 @test_or_used_in_false_branch2(i8 %x) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C_1:%.*]] = icmp ugt i8 [[X:%.*]], 10 ; CHECK-NEXT: [[T_1:%.*]] = icmp ugt i8 [[X]], 5 -; CHECK-NEXT: br i1 [[T_1]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[AND:%.*]] = or i1 false, [[T_1]] +; CHECK-NEXT: br i1 [[AND]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: ret i1 [[T_1]] ; CHECK: else: @@ -442,7 +450,8 @@ define i1 @and_select_first_implies_second_may_be_poison(ptr noundef %A, ptr nou ; CHECK-NEXT: [[C_1:%.*]] = icmp ne ptr [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds ptr, ptr [[B]], i64 -1 ; CHECK-NEXT: [[C_2:%.*]] = icmp ugt ptr [[GEP]], [[A]] -; CHECK-NEXT: ret i1 [[C_2]] +; CHECK-NEXT: [[AND:%.*]] = select i1 [[C_2]], i1 true, i1 false +; CHECK-NEXT: ret i1 [[AND]] ; entry: %c.1 = icmp ne ptr %A, %B @@ -495,7 +504,8 @@ define void @and_tree_second_implies_first(i32 noundef %v0, i32 noundef %v1, i32 ; CHECK-NEXT: [[CMP1:%.*]] = icmp sge i32 [[V1]], [[V2:%.*]] ; CHECK-NEXT: [[AND1:%.*]] = and i1 [[CMP0]], [[CMP1]] ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[V0]], [[V2]] -; CHECK-NEXT: br i1 false, label [[IF_THEN:%.*]], label [[RETURN:%.*]] +; CHECK-NEXT: [[AND2:%.*]] = and i1 false, [[AND1]] +; CHECK-NEXT: br i1 [[AND2]], label [[IF_THEN:%.*]], label [[RETURN:%.*]] ; CHECK: if.then: ; CHECK-NEXT: call void @side_effect() ; CHECK-NEXT: br label [[RETURN]] @@ -525,7 +535,8 @@ define void @and_tree_second_implies_first_perm1(i32 noundef %v0, i32 noundef %v ; CHECK-NEXT: [[CMP1:%.*]] = icmp sge i32 [[V1]], [[V2:%.*]] ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[V0]], [[V2]] ; CHECK-NEXT: [[AND1:%.*]] = and i1 [[CMP2]], [[CMP1]] -; CHECK-NEXT: br i1 false, label [[IF_THEN:%.*]], label [[RETURN:%.*]] +; CHECK-NEXT: [[AND2:%.*]] = and i1 false, [[AND1]] +; CHECK-NEXT: br i1 [[AND2]], label [[IF_THEN:%.*]], label [[RETURN:%.*]] ; CHECK: if.then: ; CHECK-NEXT: call void @side_effect() ; CHECK-NEXT: br label [[RETURN]] @@ -556,7 +567,8 @@ define void @and_tree_second_implies_first_perm2(i32 noundef %v0, i32 noundef %v ; CHECK-NEXT: [[CMP1:%.*]] = icmp sge i32 [[V1]], [[V2:%.*]] ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[V0]], [[V2]] ; CHECK-NEXT: [[AND1:%.*]] = and i1 [[CMP0]], [[CMP2]] -; CHECK-NEXT: br i1 false, label [[IF_THEN:%.*]], label [[RETURN:%.*]] +; CHECK-NEXT: [[AND2:%.*]] = and i1 false, [[AND1]] +; CHECK-NEXT: br i1 [[AND2]], label [[IF_THEN:%.*]], label [[RETURN:%.*]] ; CHECK: if.then: ; CHECK-NEXT: call void @side_effect() ; CHECK-NEXT: br label [[RETURN]] @@ -617,7 +629,8 @@ define void @or_tree_second_implies_first(i32 noundef %v0, i32 noundef %v1, i32 ; CHECK-NEXT: [[CMP1:%.*]] = icmp sge i32 [[V1]], [[V2:%.*]] ; CHECK-NEXT: [[AND1:%.*]] = or i1 [[CMP0]], [[CMP1]] ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[V0]], [[V2]] -; CHECK-NEXT: br i1 true, label [[IF_THEN:%.*]], label [[RETURN:%.*]] +; CHECK-NEXT: [[AND2:%.*]] = or i1 true, [[AND1]] +; CHECK-NEXT: br i1 [[AND2]], label [[IF_THEN:%.*]], label [[RETURN:%.*]] ; CHECK: if.then: ; CHECK-NEXT: call void @side_effect() ; CHECK-NEXT: br label [[RETURN]] @@ -646,7 +659,8 @@ define void @or_tree_second_implies_first_with_unknown_cond(i64 %x, i1 %cond) { ; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i64 [[X:%.*]], 1 ; CHECK-NEXT: [[OR1:%.*]] = select i1 [[CMP1]], i1 [[COND:%.*]], i1 false ; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i64 [[X]], 2 -; CHECK-NEXT: br i1 false, label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = select i1 [[OR1]], i1 false, i1 false +; CHECK-NEXT: br i1 [[OR2]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; CHECK: if.then: ; CHECK-NEXT: call void @side_effect() ; CHECK-NEXT: br label [[IF_END]] diff --git a/llvm/test/Transforms/ConstraintElimination/eq.ll b/llvm/test/Transforms/ConstraintElimination/eq.ll index 511a08f7796a3..04cd39490cdef 100644 --- a/llvm/test/Transforms/ConstraintElimination/eq.ll +++ b/llvm/test/Transforms/ConstraintElimination/eq.ll @@ -432,7 +432,8 @@ define i1 @test_eq_for_signed_cmp(i32 noundef %v0, i32 noundef %v1, i32 noundef ; CHECK-NEXT: [[CMP1:%.*]] = icmp sge i32 [[V0]], [[V1:%.*]] ; CHECK-NEXT: [[AND0:%.*]] = and i1 [[CMP1]], [[CMP]] ; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[V1]], [[V2]] -; CHECK-NEXT: ret i1 false +; CHECK-NEXT: [[AND1:%.*]] = and i1 false, [[AND0]] +; CHECK-NEXT: ret i1 [[AND1]] ; entry: %cmp = icmp eq i32 %v2, %v0 @@ -456,7 +457,8 @@ define i1 @test_eq_for_signed_cmp_with_decompsition(i32 noundef %v0, i32 noundef ; CHECK-NEXT: [[AND1:%.*]] = and i1 [[AND0]], [[CMP2]] ; CHECK-NEXT: [[AND2:%.*]] = and i1 [[AND1]], [[CMP3]] ; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[V1]], [[V2]] -; CHECK-NEXT: ret i1 false +; CHECK-NEXT: [[AND3:%.*]] = and i1 false, [[AND2]] +; CHECK-NEXT: ret i1 [[AND3]] ; entry: %v0add = add nsw i32 %v0, %addend0 diff --git a/llvm/test/Transforms/ConstraintElimination/gep-arithmetic-signed-predicates.ll b/llvm/test/Transforms/ConstraintElimination/gep-arithmetic-signed-predicates.ll index c9f4984bcba60..52094914f6962 100644 --- a/llvm/test/Transforms/ConstraintElimination/gep-arithmetic-signed-predicates.ll +++ b/llvm/test/Transforms/ConstraintElimination/gep-arithmetic-signed-predicates.ll @@ -611,12 +611,14 @@ define i4 @ptr_N_signed_positive_assume(ptr %src, ptr %lower, ptr %upper, i16 %N ; CHECK: step.check: ; CHECK-NEXT: [[STEP_POS:%.*]] = icmp sge i16 [[STEP:%.*]], 0 ; CHECK-NEXT: [[STEP_SLT_N:%.*]] = icmp slt i16 [[STEP]], [[N]] -; CHECK-NEXT: br i1 false, label [[PTR_CHECK:%.*]], label [[EXIT:%.*]] +; CHECK-NEXT: [[AND_STEP:%.*]] = and i1 false, [[STEP_SLT_N]] +; CHECK-NEXT: br i1 [[AND_STEP]], label [[PTR_CHECK:%.*]], label [[EXIT:%.*]] ; CHECK: ptr.check: ; CHECK-NEXT: [[SRC_STEP:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i16 [[STEP]] ; CHECK-NEXT: [[CMP_STEP_START:%.*]] = icmp slt ptr [[SRC_STEP]], [[LOWER]] ; CHECK-NEXT: [[CMP_STEP_END:%.*]] = icmp sge ptr [[SRC_STEP]], [[UPPER]] -; CHECK-NEXT: br i1 true, label [[TRAP_BB]], label [[EXIT]] +; CHECK-NEXT: [[OR_CHECK:%.*]] = or i1 true, [[CMP_STEP_END]] +; CHECK-NEXT: br i1 [[OR_CHECK]], label [[TRAP_BB]], label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret i4 3 ; diff --git a/llvm/test/Transforms/ConstraintElimination/geps-precondition-overflow-check.ll b/llvm/test/Transforms/ConstraintElimination/geps-precondition-overflow-check.ll index d90b986c8e539..08b25c6065aac 100644 --- a/llvm/test/Transforms/ConstraintElimination/geps-precondition-overflow-check.ll +++ b/llvm/test/Transforms/ConstraintElimination/geps-precondition-overflow-check.ll @@ -36,7 +36,8 @@ define i1 @overflow_check_2_and(ptr %dst) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DST_5:%.*]] = getelementptr i32, ptr [[DST:%.*]], i64 5 ; CHECK-NEXT: [[DST_5_UGE:%.*]] = icmp uge ptr [[DST_5]], [[DST]] -; CHECK-NEXT: br i1 [[DST_5_UGE]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i1 true, [[DST_5_UGE]] +; CHECK-NEXT: br i1 [[AND]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: [[DST_4:%.*]] = getelementptr i32, ptr [[DST]], i64 4 ; CHECK-NEXT: [[TRUE_DST_4_UGE:%.*]] = icmp uge ptr [[DST_4]], [[DST]] @@ -64,7 +65,8 @@ define i1 @overflow_check_3_and(ptr %dst) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DST_5:%.*]] = getelementptr i32, ptr [[DST:%.*]], i64 5 ; CHECK-NEXT: [[DST_5_UGE:%.*]] = icmp uge ptr [[DST_5]], [[DST]] -; CHECK-NEXT: br i1 [[DST_5_UGE]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i1 true, [[DST_5_UGE]] +; CHECK-NEXT: br i1 [[AND]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: [[DST_4:%.*]] = getelementptr i32, ptr [[DST]], i64 4 ; CHECK-NEXT: [[DST_4_UGE:%.*]] = icmp uge ptr [[DST_4]], [[DST]] @@ -96,7 +98,8 @@ define i1 @overflow_check_4_and(ptr %dst) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DST_5:%.*]] = getelementptr i32, ptr [[DST:%.*]], i64 5 ; CHECK-NEXT: [[DST_5_UGE:%.*]] = icmp uge ptr [[DST_5]], [[DST]] -; CHECK-NEXT: br i1 [[DST_5_UGE]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i1 true, [[DST_5_UGE]] +; CHECK-NEXT: br i1 [[AND]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: [[DST_4:%.*]] = getelementptr i32, ptr [[DST]], i64 4 ; CHECK-NEXT: [[TRUE_DST_4_UGE:%.*]] = icmp uge ptr [[DST_4]], [[DST]] @@ -149,7 +152,8 @@ define i1 @overflow_check_3_or(ptr %dst) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DST_5:%.*]] = getelementptr i32, ptr [[DST:%.*]], i64 5 ; CHECK-NEXT: [[DST_5_UGE:%.*]] = icmp uge ptr [[DST_5]], [[DST]] -; CHECK-NEXT: br i1 [[DST_5_UGE]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[OR:%.*]] = or i1 false, [[DST_5_UGE]] +; CHECK-NEXT: br i1 [[OR]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: [[DST_4:%.*]] = getelementptr i32, ptr [[DST]], i64 4 ; CHECK-NEXT: [[TRUE_DST_4_UGE:%.*]] = icmp uge ptr [[DST_4]], [[DST]] diff --git a/llvm/test/Transforms/ConstraintElimination/loops-bottom-tested-pointer-cmps.ll b/llvm/test/Transforms/ConstraintElimination/loops-bottom-tested-pointer-cmps.ll index 91546d4abf438..279238bea1842 100644 --- a/llvm/test/Transforms/ConstraintElimination/loops-bottom-tested-pointer-cmps.ll +++ b/llvm/test/Transforms/ConstraintElimination/loops-bottom-tested-pointer-cmps.ll @@ -23,7 +23,8 @@ define void @checks_in_loops_removable(ptr %ptr, ptr %lower, ptr %upper, i8 %n) ; CHECK-NEXT: [[PTR_IV:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i16 [[IV]] ; CHECK-NEXT: [[CMP_PTR_IV_LOWER:%.*]] = icmp ugt ptr [[LOWER]], [[PTR_IV]] ; CHECK-NEXT: [[CMP_PTR_IV_UPPER:%.*]] = icmp ule ptr [[UPPER]], [[PTR_IV]] -; CHECK-NEXT: br i1 [[CMP_PTR_IV_UPPER]], label [[TRAP]], label [[LOOP_LATCH]] +; CHECK-NEXT: [[OR:%.*]] = or i1 false, [[CMP_PTR_IV_UPPER]] +; CHECK-NEXT: br i1 [[OR]], label [[TRAP]], label [[LOOP_LATCH]] ; CHECK: loop.latch: ; CHECK-NEXT: store i8 0, ptr [[PTR_IV]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i16 [[IV]], 1 @@ -87,13 +88,14 @@ define void @some_checks_in_loops_removable(ptr %ptr, ptr %lower, ptr %upper, i8 ; CHECK-NEXT: [[PTR_IV:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i16 [[IV]] ; CHECK-NEXT: [[CMP_PTR_IV_LOWER:%.*]] = icmp ugt ptr [[LOWER]], [[PTR_IV]] ; CHECK-NEXT: [[CMP_PTR_IV_UPPER:%.*]] = icmp ule ptr [[UPPER]], [[PTR_IV]] -; CHECK-NEXT: br i1 [[CMP_PTR_IV_UPPER]], label [[TRAP]], label [[LOOP_BODY:%.*]] +; CHECK-NEXT: [[OR:%.*]] = or i1 false, [[CMP_PTR_IV_UPPER]] +; CHECK-NEXT: br i1 [[OR]], label [[TRAP]], label [[LOOP_BODY:%.*]] ; CHECK: loop.body: ; CHECK-NEXT: [[IV_1:%.*]] = add nuw nsw i16 [[IV]], 1 ; CHECK-NEXT: [[PTR_IV_1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i16 [[IV_1]] ; CHECK-NEXT: [[CMP_PTR_IV_1_UPPER:%.*]] = icmp ule ptr [[UPPER]], [[PTR_IV_1]] ; CHECK-NEXT: [[OR_1:%.*]] = or i1 false, [[CMP_PTR_IV_1_UPPER]] -; CHECK-NEXT: br i1 [[CMP_PTR_IV_UPPER]], label [[TRAP]], label [[LOOP_LATCH]] +; CHECK-NEXT: br i1 [[OR]], label [[TRAP]], label [[LOOP_LATCH]] ; CHECK: loop.latch: ; CHECK-NEXT: store i8 0, ptr [[PTR_IV]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i16 [[IV]], 1 @@ -163,13 +165,14 @@ define void @no_checks_in_loops_removable(ptr %ptr, ptr %lower, ptr %upper, i8 % ; CHECK-NEXT: [[PTR_IV:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i16 [[IV]] ; CHECK-NEXT: [[CMP_PTR_IV_LOWER:%.*]] = icmp ugt ptr [[LOWER]], [[PTR_IV]] ; CHECK-NEXT: [[CMP_PTR_IV_UPPER:%.*]] = icmp ule ptr [[UPPER]], [[PTR_IV]] -; CHECK-NEXT: br i1 [[CMP_PTR_IV_UPPER]], label [[TRAP]], label [[LOOP_BODY:%.*]] +; CHECK-NEXT: [[OR:%.*]] = or i1 false, [[CMP_PTR_IV_UPPER]] +; CHECK-NEXT: br i1 [[OR]], label [[TRAP]], label [[LOOP_BODY:%.*]] ; CHECK: loop.body: ; CHECK-NEXT: [[IV_1:%.*]] = add nuw nsw i16 [[IV]], 1 ; CHECK-NEXT: [[PTR_IV_1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i16 [[IV_1]] ; CHECK-NEXT: [[CMP_PTR_IV_1_UPPER:%.*]] = icmp ule ptr [[UPPER]], [[PTR_IV_1]] ; CHECK-NEXT: [[OR_1:%.*]] = or i1 false, [[CMP_PTR_IV_1_UPPER]] -; CHECK-NEXT: br i1 [[CMP_PTR_IV_UPPER]], label [[TRAP]], label [[LOOP_LATCH]] +; CHECK-NEXT: br i1 [[OR]], label [[TRAP]], label [[LOOP_LATCH]] ; CHECK: loop.latch: ; CHECK-NEXT: store i8 0, ptr [[PTR_IV]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i16 [[IV]], 1 diff --git a/llvm/test/Transforms/ConstraintElimination/loops-header-tested-pointer-cmps.ll b/llvm/test/Transforms/ConstraintElimination/loops-header-tested-pointer-cmps.ll index df0cb40965430..1842ca2d82545 100644 --- a/llvm/test/Transforms/ConstraintElimination/loops-header-tested-pointer-cmps.ll +++ b/llvm/test/Transforms/ConstraintElimination/loops-header-tested-pointer-cmps.ll @@ -212,7 +212,8 @@ define void @test2_with_ne(ptr %src, ptr %lower, ptr %upper, i8 %N) { ; CHECK-NEXT: [[SRC_IV:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 [[IV]] ; CHECK-NEXT: [[CMP_IV_START:%.*]] = icmp ult ptr [[SRC_IV]], [[LOWER]] ; CHECK-NEXT: [[CMP_IV_END:%.*]] = icmp uge ptr [[SRC_IV]], [[UPPER]] -; CHECK-NEXT: br i1 [[CMP_IV_END]], label [[TRAP_BB]], label [[LOOP_BODY_1:%.*]] +; CHECK-NEXT: [[OR_1:%.*]] = or i1 false, [[CMP_IV_END]] +; CHECK-NEXT: br i1 [[OR_1]], label [[TRAP_BB]], label [[LOOP_BODY_1:%.*]] ; CHECK: loop.body.1: ; CHECK-NEXT: [[ADD_1:%.*]] = add nuw nsw i8 [[IV]], 1 ; CHECK-NEXT: [[SRC_IV_1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 [[ADD_1]] @@ -305,7 +306,8 @@ define void @test3(ptr %src, ptr %lower, ptr %upper, i8 %N) { ; CHECK-NEXT: [[SRC_IV:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 [[IV]] ; CHECK-NEXT: [[CMP_IV_START:%.*]] = icmp ult ptr [[SRC_IV]], [[LOWER]] ; CHECK-NEXT: [[CMP_IV_END:%.*]] = icmp uge ptr [[SRC_IV]], [[UPPER]] -; CHECK-NEXT: br i1 [[CMP_IV_END]], label [[TRAP_BB]], label [[LOOP_BODY_1:%.*]] +; CHECK-NEXT: [[OR_1:%.*]] = or i1 false, [[CMP_IV_END]] +; CHECK-NEXT: br i1 [[OR_1]], label [[TRAP_BB]], label [[LOOP_BODY_1:%.*]] ; CHECK: loop.body.1: ; CHECK-NEXT: [[SRC_IV_1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 [[NEXT]] ; CHECK-NEXT: [[CMP_IV_1_END:%.*]] = icmp uge ptr [[SRC_IV_1]], [[UPPER]] diff --git a/llvm/test/Transforms/ConstraintElimination/or-implied-by-operands.ll b/llvm/test/Transforms/ConstraintElimination/or-implied-by-operands.ll index 4303cacc59ed1..f5c108822b8cd 100644 --- a/llvm/test/Transforms/ConstraintElimination/or-implied-by-operands.ll +++ b/llvm/test/Transforms/ConstraintElimination/or-implied-by-operands.ll @@ -6,7 +6,8 @@ define i1 @test_second_or_condition_implied_by_first(i8 %x) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C_1:%.*]] = icmp ule i8 [[X:%.*]], 10 ; CHECK-NEXT: [[T_1:%.*]] = icmp ugt i8 [[X]], 5 -; CHECK-NEXT: br i1 true, label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[OR:%.*]] = or i1 true, [[T_1]] +; CHECK-NEXT: br i1 [[OR]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: ret i1 false ; CHECK: else: @@ -30,7 +31,8 @@ define i1 @test_first_or_condition_implied_by_second_ops(i8 %x) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C_1:%.*]] = icmp ule i8 [[X:%.*]], 10 ; CHECK-NEXT: [[T_1:%.*]] = icmp ugt i8 [[X]], 5 -; CHECK-NEXT: br i1 true, label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[OR:%.*]] = or i1 [[T_1]], true +; CHECK-NEXT: br i1 [[OR]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: ret i1 false ; CHECK: else: @@ -103,7 +105,8 @@ define i1 @test_same_cond_for_or(i8 %x) { ; CHECK-LABEL: @test_same_cond_for_or( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C_1:%.*]] = icmp ugt i8 [[X:%.*]], 10 -; CHECK-NEXT: br i1 [[C_1]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[OR:%.*]] = or i1 false, [[C_1]] +; CHECK-NEXT: br i1 [[OR]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: ret i1 false ; CHECK: else: @@ -149,7 +152,8 @@ define i1 @test_second_or_condition_not_implied_by_first(i8 %x) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C_1:%.*]] = icmp ugt i8 [[X:%.*]], 10 ; CHECK-NEXT: [[C_2:%.*]] = icmp ugt i8 [[X]], 5 -; CHECK-NEXT: br i1 [[C_2]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[OR:%.*]] = or i1 [[C_2]], false +; CHECK-NEXT: br i1 [[OR]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: ret i1 false ; CHECK: else: @@ -240,7 +244,8 @@ define i1 @test_or_used_in_false_branch(i8 %x) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C_1:%.*]] = icmp ule i8 [[X:%.*]], 10 ; CHECK-NEXT: [[T_1:%.*]] = icmp ule i8 [[X]], 5 -; CHECK-NEXT: br i1 [[C_1]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[OR:%.*]] = or i1 [[C_1]], false +; CHECK-NEXT: br i1 [[OR]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: ret i1 [[T_1]] ; CHECK: else: @@ -265,7 +270,8 @@ define i1 @test_or_used_in_false_branch2(i8 %x) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C_1:%.*]] = icmp ugt i8 [[X:%.*]], 10 ; CHECK-NEXT: [[T_1:%.*]] = icmp ugt i8 [[X]], 5 -; CHECK-NEXT: br i1 [[T_1]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[OR:%.*]] = or i1 false, [[T_1]] +; CHECK-NEXT: br i1 [[OR]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: ; CHECK-NEXT: ret i1 [[T_1]] ; CHECK: else: @@ -301,17 +307,3 @@ entry: %or = select i1 %cmp.eq, i1 true, i1 %cmp.eq.1 ret i1 %or } - -define i1 @test_or_disjoint_set_operand(i8 %x) { -; CHECK-LABEL: @test_or_disjoint_set_operand( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i8 [[X:%.*]], 1 -; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i8 [[X]], 0 -; CHECK-NEXT: ret i1 true -; -entry: - %cmp1 = icmp slt i8 %x, 1 - %cmp2 = icmp ne i8 %x, 0 - %or = or disjoint i1 %cmp2, %cmp1 - ret i1 %or -} diff --git a/llvm/test/Transforms/ConstraintElimination/or.ll b/llvm/test/Transforms/ConstraintElimination/or.ll index 0827669f1bcbe..b401d6f181369 100644 --- a/llvm/test/Transforms/ConstraintElimination/or.ll +++ b/llvm/test/Transforms/ConstraintElimination/or.ll @@ -124,7 +124,8 @@ define i1 @test_or_chain_ule_1(i4 %x, i4 %y, i4 %z, i4 %a, i4 %b) { ; CHECK-NEXT: [[C_3:%.*]] = icmp ule i4 2, [[X]] ; CHECK-NEXT: [[C_4:%.*]] = icmp ule i4 2, [[A:%.*]] ; CHECK-NEXT: [[OR_1:%.*]] = or i1 [[C_1]], [[C_2]] -; CHECK-NEXT: [[OR_3:%.*]] = or i1 [[C_4]], true +; CHECK-NEXT: [[OR_2:%.*]] = or i1 [[OR_1]], true +; CHECK-NEXT: [[OR_3:%.*]] = or i1 [[C_4]], [[OR_2]] ; CHECK-NEXT: br i1 [[OR_3]], label [[BB1:%.*]], label [[EXIT:%.*]] ; CHECK: bb1: ; CHECK-NEXT: [[C_5:%.*]] = icmp ule i4 [[X]], [[Z]] diff --git a/llvm/test/Transforms/ForcedFunctionAttrs/open-file-error.ll b/llvm/test/Transforms/ForcedFunctionAttrs/open-file-error.ll deleted file mode 100644 index 61db001d7eb1e..0000000000000 --- a/llvm/test/Transforms/ForcedFunctionAttrs/open-file-error.ll +++ /dev/null @@ -1,6 +0,0 @@ -; RUN: not opt -disable-output -passes='forceattrs' -forceattrs-csv-path="%S/CannotOpenFile.csv" %s 2>&1 | FileCheck -DMSG=%errc_ENOENT %s - -; CHECK: error: cannot open CSV file: [[MSG]] -define void @first_function() { - ret void -} diff --git a/llvm/test/Transforms/GVN/phi.ll b/llvm/test/Transforms/GVN/phi.ll index 5b607f7559c1b..5d4f227132a6f 100644 --- a/llvm/test/Transforms/GVN/phi.ll +++ b/llvm/test/Transforms/GVN/phi.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=gvn < %s | FileCheck --check-prefixes=CHECK,MDEP %s -; RUN: opt -S -passes='gvn' < %s | FileCheck %s +; RUN: opt -S -passes=gvn < %s | FileCheck %s define i64 @test1(i1 %c, i64 %a, i64 %b) { @@ -199,5 +198,3 @@ next: %phi = phi i64 [%a, %merge] ret i64 %phi } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; MDEP: {{.*}} diff --git a/llvm/test/Transforms/GVN/pre-compare.ll b/llvm/test/Transforms/GVN/pre-compare.ll index 574d40dfb71d5..ea8fbce01bd6c 100644 --- a/llvm/test/Transforms/GVN/pre-compare.ll +++ b/llvm/test/Transforms/GVN/pre-compare.ll @@ -1,6 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -passes=gvn -S < %s | FileCheck --check-prefixes=CHECK,MDEP %s -; RUN: opt -passes='gvn' -S < %s | FileCheck --check-prefixes=CHECK,MSSA %s +; RUN: opt -passes=gvn -S < %s | FileCheck %s ; C source: ; @@ -39,28 +37,6 @@ @.str3 = private unnamed_addr constant [12 x i8] c"step 2: %d\0A\00", align 1 define void @f(i32 %x) noreturn nounwind uwtable ssp { -; CHECK-LABEL: define void @f( -; CHECK-SAME: i32 [[X:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 1 -; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_COND_PREHEADER:.*]], label %[[IF_THEN:.*]] -; CHECK: [[IF_THEN]]: -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[X]], 2 -; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP1]], ptr @.str, ptr @.str1 -; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @puts(ptr [[COND]]) #[[ATTR1:[0-9]+]] -; CHECK-NEXT: br label %[[FOR_COND_PREHEADER]] -; CHECK: [[FOR_COND_PREHEADER]]: -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i32 [[X]], 2 -; CHECK-NEXT: br label %[[FOR_COND:.*]] -; CHECK: [[FOR_COND]]: -; CHECK-NEXT: [[CALL2:%.*]] = tail call i32 @puts(ptr @.str2) #[[ATTR1]] -; CHECK-NEXT: br i1 [[CMP3]], label %[[FOR_COND_BACKEDGE:.*]], label %[[IF_END5:.*]] -; CHECK: [[IF_END5]]: -; CHECK-NEXT: [[CALL6:%.*]] = tail call i32 (ptr, ...) @printf(ptr @.str3, i32 [[X]]) #[[ATTR1]] -; CHECK-NEXT: br label %[[FOR_COND_BACKEDGE]] -; CHECK: [[FOR_COND_BACKEDGE]]: -; CHECK-NEXT: br label %[[FOR_COND]] -; entry: %cmp = icmp eq i32 %x, 1 br i1 %cmp, label %for.cond.preheader, label %if.then @@ -90,6 +66,3 @@ for.cond.backedge: ; preds = %if.end5, %for.cond declare i32 @puts(ptr nocapture) nounwind declare i32 @printf(ptr nocapture, ...) nounwind -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; MDEP: {{.*}} -; MSSA: {{.*}} diff --git a/llvm/test/Transforms/GVN/readattrs.ll b/llvm/test/Transforms/GVN/readattrs.ll index be018834014d5..b16c53adc0d4d 100644 --- a/llvm/test/Transforms/GVN/readattrs.ll +++ b/llvm/test/Transforms/GVN/readattrs.ll @@ -1,6 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -passes=gvn -S -o - < %s | FileCheck --check-prefixes=CHECK,MDEP %s -; RUN: opt -passes='gvn' -S -o - < %s | FileCheck --check-prefixes=CHECK,MSSA %s +; RUN: opt -passes=gvn -S -o - < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" target triple = "x86_64-unknown-linux-gnu" @@ -8,24 +6,12 @@ target triple = "x86_64-unknown-linux-gnu" declare void @use(ptr readonly nocapture) define i8 @test() { -; MDEP-LABEL: define i8 @test() { -; MDEP-NEXT: [[A:%.*]] = alloca i8, align 1 -; MDEP-NEXT: store i8 1, ptr [[A]], align 1 -; MDEP-NEXT: call void @use(ptr [[A]]) -; MDEP-NEXT: ret i8 1 -; -; MSSA-LABEL: define i8 @test() { -; MSSA-NEXT: [[A:%.*]] = alloca i8, align 1 -; MSSA-NEXT: store i8 1, ptr [[A]], align 1 -; MSSA-NEXT: call void @use(ptr [[A]]) -; MSSA-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 1 -; MSSA-NEXT: ret i8 [[B]] -; %a = alloca i8 store i8 1, ptr %a call void @use(ptr %a) %b = load i8, ptr %a ret i8 %b +; CHECK-LABEL: define i8 @test( +; CHECK: call void @use(ptr %a) +; CHECK-NEXT: ret i8 1 } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK: {{.*}} diff --git a/llvm/test/Transforms/GVN/setjmp.ll b/llvm/test/Transforms/GVN/setjmp.ll index 7777038f89cb1..07b7028346760 100644 --- a/llvm/test/Transforms/GVN/setjmp.ll +++ b/llvm/test/Transforms/GVN/setjmp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -S -passes=gvn < %s | FileCheck --check-prefixes=CHECK,MDEP %s -; RUN: opt -S -passes='gvn' -verify-analysis-invalidation < %s | FileCheck --check-prefixes=CHECK,MSSA %s +; RUN: opt -S -passes=gvn < %s | FileCheck %s + declare i32 @setjmp() returns_twice declare void @longjmp() declare ptr @malloc(i64) @@ -38,32 +38,18 @@ if.end: ; We are still allowed to optimize non-volatile accesses to allocas. define i32 @test_alloca() { -; MDEP-LABEL: define i32 @test_alloca() { -; MDEP-NEXT: [[ALLOC:%.*]] = alloca i43, align 8 -; MDEP-NEXT: store i32 10, ptr [[ALLOC]], align 4 -; MDEP-NEXT: [[SJ:%.*]] = call i32 @setjmp() -; MDEP-NEXT: [[CMP:%.*]] = icmp eq i32 [[SJ]], 0 -; MDEP-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] -; MDEP: [[IF_THEN]]: -; MDEP-NEXT: store i32 20, ptr [[ALLOC]], align 4 -; MDEP-NEXT: call void @longjmp() -; MDEP-NEXT: unreachable -; MDEP: [[IF_END]]: -; MDEP-NEXT: ret i32 10 -; -; MSSA-LABEL: define i32 @test_alloca() { -; MSSA-NEXT: [[ALLOC:%.*]] = alloca i43, align 8 -; MSSA-NEXT: store i32 10, ptr [[ALLOC]], align 4 -; MSSA-NEXT: [[SJ:%.*]] = call i32 @setjmp() -; MSSA-NEXT: [[CMP:%.*]] = icmp eq i32 [[SJ]], 0 -; MSSA-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] -; MSSA: [[IF_THEN]]: -; MSSA-NEXT: store i32 20, ptr [[ALLOC]], align 4 -; MSSA-NEXT: call void @longjmp() -; MSSA-NEXT: unreachable -; MSSA: [[IF_END]]: -; MSSA-NEXT: [[RES:%.*]] = load i32, ptr [[ALLOC]], align 4 -; MSSA-NEXT: ret i32 [[RES]] +; CHECK-LABEL: define i32 @test_alloca() { +; CHECK-NEXT: [[ALLOC:%.*]] = alloca i43, align 8 +; CHECK-NEXT: store i32 10, ptr [[ALLOC]], align 4 +; CHECK-NEXT: [[SJ:%.*]] = call i32 @setjmp() +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[SJ]], 0 +; CHECK-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: store i32 20, ptr [[ALLOC]], align 4 +; CHECK-NEXT: call void @longjmp() +; CHECK-NEXT: unreachable +; CHECK: [[IF_END]]: +; CHECK-NEXT: ret i32 10 ; %alloc = alloca i43 store i32 10, ptr %alloc, align 4 diff --git a/llvm/test/Transforms/GVN/tbaa.ll b/llvm/test/Transforms/GVN/tbaa.ll index 366dfeca8b758..b5dd3867bdbc2 100644 --- a/llvm/test/Transforms/GVN/tbaa.ll +++ b/llvm/test/Transforms/GVN/tbaa.ll @@ -1,20 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -passes=gvn -S < %s | FileCheck --check-prefixes=CHECK,MDEP %s -; RUN: opt -passes='gvn' -S < %s | FileCheck --check-prefixes=CHECK,MSSA %s +; RUN: opt -passes=gvn -S < %s | FileCheck %s define i32 @test1(ptr %p, ptr %q) { -; MDEP-LABEL: define i32 @test1( -; MDEP-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; MDEP-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0:![0-9]+]] -; MDEP-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] -; MDEP-NEXT: ret i32 [[C]] -; -; MSSA-LABEL: define i32 @test1( -; MSSA-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; MSSA-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0:![0-9]+]] -; MSSA-NEXT: [[B:%.*]] = call i32 @foo(ptr [[P]]) -; MSSA-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] -; MSSA-NEXT: ret i32 [[C]] +; CHECK-LABEL: define i32 @test1( +; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] +; CHECK-NEXT: ret i32 [[C]] ; %a = call i32 @foo(ptr %p), !tbaa !0 %b = call i32 @foo(ptr %p) @@ -23,18 +15,11 @@ define i32 @test1(ptr %p, ptr %q) { } define i32 @test2(ptr %p, ptr %q) { -; MDEP-LABEL: define i32 @test2( -; MDEP-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; MDEP-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0]] -; MDEP-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] -; MDEP-NEXT: ret i32 [[C]] -; -; MSSA-LABEL: define i32 @test2( -; MSSA-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; MSSA-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0]] -; MSSA-NEXT: [[B:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0]] -; MSSA-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] -; MSSA-NEXT: ret i32 [[C]] +; CHECK-LABEL: define i32 @test2( +; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0]] +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] +; CHECK-NEXT: ret i32 [[C]] ; %a = call i32 @foo(ptr %p), !tbaa !0 %b = call i32 @foo(ptr %p), !tbaa !0 @@ -43,18 +28,11 @@ define i32 @test2(ptr %p, ptr %q) { } define i32 @test3(ptr %p, ptr %q) { -; MDEP-LABEL: define i32 @test3( -; MDEP-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; MDEP-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA4:![0-9]+]] -; MDEP-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] -; MDEP-NEXT: ret i32 [[C]] -; -; MSSA-LABEL: define i32 @test3( -; MSSA-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; MSSA-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA4:![0-9]+]] -; MSSA-NEXT: [[B:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA4]] -; MSSA-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] -; MSSA-NEXT: ret i32 [[C]] +; CHECK-LABEL: define i32 @test3( +; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA4:![0-9]+]] +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] +; CHECK-NEXT: ret i32 [[C]] ; %a = call i32 @foo(ptr %p), !tbaa !3 %b = call i32 @foo(ptr %p), !tbaa !3 @@ -63,18 +41,11 @@ define i32 @test3(ptr %p, ptr %q) { } define i32 @test4(ptr %p, ptr %q) { -; MDEP-LABEL: define i32 @test4( -; MDEP-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; MDEP-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA6:![0-9]+]] -; MDEP-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] -; MDEP-NEXT: ret i32 [[C]] -; -; MSSA-LABEL: define i32 @test4( -; MSSA-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; MSSA-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA6:![0-9]+]] -; MSSA-NEXT: [[B:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0]] -; MSSA-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] -; MSSA-NEXT: ret i32 [[C]] +; CHECK-LABEL: define i32 @test4( +; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA6:![0-9]+]] +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] +; CHECK-NEXT: ret i32 [[C]] ; %a = call i32 @foo(ptr %p), !tbaa !1 %b = call i32 @foo(ptr %p), !tbaa !0 @@ -83,18 +54,11 @@ define i32 @test4(ptr %p, ptr %q) { } define i32 @test5(ptr %p, ptr %q) { -; MDEP-LABEL: define i32 @test5( -; MDEP-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; MDEP-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0]] -; MDEP-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] -; MDEP-NEXT: ret i32 [[C]] -; -; MSSA-LABEL: define i32 @test5( -; MSSA-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; MSSA-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0]] -; MSSA-NEXT: [[B:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA6]] -; MSSA-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] -; MSSA-NEXT: ret i32 [[C]] +; CHECK-LABEL: define i32 @test5( +; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0]] +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] +; CHECK-NEXT: ret i32 [[C]] ; %a = call i32 @foo(ptr %p), !tbaa !0 %b = call i32 @foo(ptr %p), !tbaa !1 @@ -103,18 +67,11 @@ define i32 @test5(ptr %p, ptr %q) { } define i32 @test6(ptr %p, ptr %q) { -; MDEP-LABEL: define i32 @test6( -; MDEP-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; MDEP-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0]] -; MDEP-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] -; MDEP-NEXT: ret i32 [[C]] -; -; MSSA-LABEL: define i32 @test6( -; MSSA-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; MSSA-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0]] -; MSSA-NEXT: [[B:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA4]] -; MSSA-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] -; MSSA-NEXT: ret i32 [[C]] +; CHECK-LABEL: define i32 @test6( +; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA0]] +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] +; CHECK-NEXT: ret i32 [[C]] ; %a = call i32 @foo(ptr %p), !tbaa !0 %b = call i32 @foo(ptr %p), !tbaa !3 @@ -123,18 +80,11 @@ define i32 @test6(ptr %p, ptr %q) { } define i32 @test7(ptr %p, ptr %q) { -; MDEP-LABEL: define i32 @test7( -; MDEP-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; MDEP-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA7:![0-9]+]] -; MDEP-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] -; MDEP-NEXT: ret i32 [[C]] -; -; MSSA-LABEL: define i32 @test7( -; MSSA-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; MSSA-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA7:![0-9]+]] -; MSSA-NEXT: [[B:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA4]] -; MSSA-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] -; MSSA-NEXT: ret i32 [[C]] +; CHECK-LABEL: define i32 @test7( +; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA7:![0-9]+]] +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] +; CHECK-NEXT: ret i32 [[C]] ; %a = call i32 @foo(ptr %p), !tbaa !4 %b = call i32 @foo(ptr %p), !tbaa !3 @@ -143,18 +93,10 @@ define i32 @test7(ptr %p, ptr %q) { } define i32 @test8(ptr %p, ptr %q) { -; MDEP-LABEL: define i32 @test8( -; MDEP-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; MDEP-NEXT: store i32 15, ptr [[P]], align 4 -; MDEP-NEXT: ret i32 0 -; -; MSSA-LABEL: define i32 @test8( -; MSSA-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; MSSA-NEXT: [[A:%.*]] = load i32, ptr [[Q]], align 4, !tbaa [[TBAA10:![0-9]+]] -; MSSA-NEXT: store i32 15, ptr [[P]], align 4 -; MSSA-NEXT: [[B:%.*]] = load i32, ptr [[Q]], align 4, !tbaa [[TBAA10]] -; MSSA-NEXT: [[C:%.*]] = sub i32 [[A]], [[B]] -; MSSA-NEXT: ret i32 [[C]] +; CHECK-LABEL: define i32 @test8( +; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { +; CHECK-NEXT: store i32 15, ptr [[P]], align 4 +; CHECK-NEXT: ret i32 0 ; ; Since we know the location is invariant, we can forward the ; load across the potentially aliasing store. @@ -167,18 +109,10 @@ define i32 @test8(ptr %p, ptr %q) { } define i32 @test9(ptr %p, ptr %q) { -; MDEP-LABEL: define i32 @test9( -; MDEP-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; MDEP-NEXT: call void @clobber() -; MDEP-NEXT: ret i32 0 -; -; MSSA-LABEL: define i32 @test9( -; MSSA-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; MSSA-NEXT: [[A:%.*]] = load i32, ptr [[Q]], align 4, !tbaa [[TBAA10]] -; MSSA-NEXT: call void @clobber() -; MSSA-NEXT: [[B:%.*]] = load i32, ptr [[Q]], align 4, !tbaa [[TBAA10]] -; MSSA-NEXT: [[C:%.*]] = sub i32 [[A]], [[B]] -; MSSA-NEXT: ret i32 [[C]] +; CHECK-LABEL: define i32 @test9( +; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { +; CHECK-NEXT: call void @clobber() +; CHECK-NEXT: ret i32 0 ; ; Since we know the location is invariant, we can forward the ; load across the potentially aliasing store (within the call). @@ -193,18 +127,11 @@ define i32 @test9(ptr %p, ptr %q) { define i32 @test10(ptr %p, ptr %q) { ; If one access encloses the other, then the merged access is the enclosed one ; and not just the common final access type. -; MDEP-LABEL: define i32 @test10( -; MDEP-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; MDEP-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA10:![0-9]+]] -; MDEP-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] -; MDEP-NEXT: ret i32 [[C]] -; -; MSSA-LABEL: define i32 @test10( -; MSSA-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { -; MSSA-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA13:![0-9]+]] -; MSSA-NEXT: [[B:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA17:![0-9]+]] -; MSSA-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] -; MSSA-NEXT: ret i32 [[C]] +; CHECK-LABEL: define i32 @test10( +; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = call i32 @foo(ptr [[P]]), !tbaa [[TBAA10:![0-9]+]] +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] +; CHECK-NEXT: ret i32 [[C]] ; %a = call i32 @foo(ptr %p), !tbaa !15 ; TAG_X_i %b = call i32 @foo(ptr %p), !tbaa !19 ; TAG_Y_x_i @@ -238,40 +165,18 @@ declare i32 @foo(ptr) readonly !9 = !{!"yet another root"} !10 = !{!"node", !9, i64 1} ;. -; MDEP: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} -; MDEP: [[META1]] = !{!"C", [[META2:![0-9]+]]} -; MDEP: [[META2]] = !{!"A", [[META3:![0-9]+]]} -; MDEP: [[META3]] = !{!"tbaa root"} -; MDEP: [[TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} -; MDEP: [[META5]] = !{!"B", [[META2]]} -; MDEP: [[TBAA6]] = !{[[META2]], [[META2]], i64 0} -; MDEP: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} -; MDEP: [[META8]] = !{!"scalar type", [[META9:![0-9]+]]} -; MDEP: [[META9]] = !{!"another root"} -; MDEP: [[TBAA10]] = !{[[META11:![0-9]+]], [[META12:![0-9]+]], i64 0} -; MDEP: [[META11]] = !{!"struct X", [[META12]], i64 0} -; MDEP: [[META12]] = !{!"int", [[META13:![0-9]+]], i64 0} -; MDEP: [[META13]] = !{!"char", [[META3]], i64 0} -;. -; MSSA: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} -; MSSA: [[META1]] = !{!"C", [[META2:![0-9]+]]} -; MSSA: [[META2]] = !{!"A", [[META3:![0-9]+]]} -; MSSA: [[META3]] = !{!"tbaa root"} -; MSSA: [[TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} -; MSSA: [[META5]] = !{!"B", [[META2]]} -; MSSA: [[TBAA6]] = !{[[META2]], [[META2]], i64 0} -; MSSA: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} -; MSSA: [[META8]] = !{!"scalar type", [[META9:![0-9]+]]} -; MSSA: [[META9]] = !{!"another root"} -; MSSA: [[TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0, i64 1} -; MSSA: [[META11]] = !{!"node", [[META12:![0-9]+]]} -; MSSA: [[META12]] = !{!"yet another root"} -; MSSA: [[TBAA13]] = !{[[META14:![0-9]+]], [[META15:![0-9]+]], i64 0} -; MSSA: [[META14]] = !{!"struct X", [[META15]], i64 0} -; MSSA: [[META15]] = !{!"int", [[META16:![0-9]+]], i64 0} -; MSSA: [[META16]] = !{!"char", [[META3]], i64 0} -; MSSA: [[TBAA17]] = !{[[META18:![0-9]+]], [[META15]], i64 0} -; MSSA: [[META18]] = !{!"struct Y", [[META14]], i64 0} +; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[META1]] = !{!"C", [[META2:![0-9]+]]} +; CHECK: [[META2]] = !{!"A", [[META3:![0-9]+]]} +; CHECK: [[META3]] = !{!"tbaa root"} +; CHECK: [[TBAA4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} +; CHECK: [[META5]] = !{!"B", [[META2]]} +; CHECK: [[TBAA6]] = !{[[META2]], [[META2]], i64 0} +; CHECK: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} +; CHECK: [[META8]] = !{!"scalar type", [[META9:![0-9]+]]} +; CHECK: [[META9]] = !{!"another root"} +; CHECK: [[TBAA10]] = !{[[META11:![0-9]+]], [[META12:![0-9]+]], i64 0} +; CHECK: [[META11]] = !{!"struct X", [[META12]], i64 0} +; CHECK: [[META12]] = !{!"int", [[META13:![0-9]+]], i64 0} +; CHECK: [[META13]] = !{!"char", [[META3]], i64 0} ;. -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK: {{.*}} diff --git a/llvm/test/Transforms/GVN/vscale.ll b/llvm/test/Transforms/GVN/vscale.ll index 646a67d15d392..d7b07b9891c41 100644 --- a/llvm/test/Transforms/GVN/vscale.ll +++ b/llvm/test/Transforms/GVN/vscale.ll @@ -1,22 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S < %s -passes=gvn,dce | FileCheck --check-prefixes=CHECK,MDEP %s -; RUN: opt -S < %s -passes='gvn',dce | FileCheck --check-prefixes=CHECK,MSSA %s +; RUN: opt -S < %s -passes=gvn,dce | FileCheck %s ; Analyze Load from clobbering Load. define @load_store_clobber_load(ptr %p) { -; MDEP-LABEL: @load_store_clobber_load( -; MDEP-NEXT: [[LOAD1:%.*]] = load , ptr [[P:%.*]], align 16 -; MDEP-NEXT: store zeroinitializer, ptr undef, align 16 -; MDEP-NEXT: [[ADD:%.*]] = add [[LOAD1]], [[LOAD1]] -; MDEP-NEXT: ret [[ADD]] -; -; MSSA-LABEL: @load_store_clobber_load( -; MSSA-NEXT: [[LOAD1:%.*]] = load , ptr [[P:%.*]], align 16 -; MSSA-NEXT: store zeroinitializer, ptr undef, align 16 -; MSSA-NEXT: [[LOAD2:%.*]] = load , ptr [[P]], align 16 -; MSSA-NEXT: [[ADD:%.*]] = add [[LOAD1]], [[LOAD2]] -; MSSA-NEXT: ret [[ADD]] +; CHECK-LABEL: @load_store_clobber_load( +; CHECK-NEXT: [[LOAD1:%.*]] = load , ptr [[P:%.*]], align 16 +; CHECK-NEXT: store zeroinitializer, ptr undef, align 16 +; CHECK-NEXT: [[ADD:%.*]] = add [[LOAD1]], [[LOAD1]] +; CHECK-NEXT: ret [[ADD]] ; %load1 = load , ptr %p store zeroinitializer, ptr undef @@ -41,18 +33,11 @@ define @load_store_clobber_load_mayalias(ptr %p, ptr %p2) { } define @load_store_clobber_load_noalias(ptr noalias %p, ptr noalias %p2) { -; MDEP-LABEL: @load_store_clobber_load_noalias( -; MDEP-NEXT: [[LOAD1:%.*]] = load , ptr [[P:%.*]], align 16 -; MDEP-NEXT: store zeroinitializer, ptr [[P2:%.*]], align 16 -; MDEP-NEXT: [[ADD:%.*]] = add [[LOAD1]], [[LOAD1]] -; MDEP-NEXT: ret [[ADD]] -; -; MSSA-LABEL: @load_store_clobber_load_noalias( -; MSSA-NEXT: [[LOAD1:%.*]] = load , ptr [[P:%.*]], align 16 -; MSSA-NEXT: store zeroinitializer, ptr [[P2:%.*]], align 16 -; MSSA-NEXT: [[LOAD2:%.*]] = load , ptr [[P]], align 16 -; MSSA-NEXT: [[ADD:%.*]] = add [[LOAD1]], [[LOAD2]] -; MSSA-NEXT: ret [[ADD]] +; CHECK-LABEL: @load_store_clobber_load_noalias( +; CHECK-NEXT: [[LOAD1:%.*]] = load , ptr [[P:%.*]], align 16 +; CHECK-NEXT: store zeroinitializer, ptr [[P2:%.*]], align 16 +; CHECK-NEXT: [[ADD:%.*]] = add [[LOAD1]], [[LOAD1]] +; CHECK-NEXT: ret [[ADD]] ; %load1 = load , ptr %p store zeroinitializer, ptr %p2 @@ -63,18 +48,11 @@ define @load_store_clobber_load_noalias(ptr noalias %p, ptr n ; BasicAA return MayAlias for %gep1,%gep2, could improve as MustAlias. define i32 @load_clobber_load_gep1(ptr %p) { -; MDEP-LABEL: @load_clobber_load_gep1( -; MDEP-NEXT: [[GEP1:%.*]] = getelementptr , ptr [[P:%.*]], i64 0, i64 1 -; MDEP-NEXT: [[LOAD1:%.*]] = load i32, ptr [[GEP1]], align 4 -; MDEP-NEXT: [[ADD:%.*]] = add i32 [[LOAD1]], [[LOAD1]] -; MDEP-NEXT: ret i32 [[ADD]] -; -; MSSA-LABEL: @load_clobber_load_gep1( -; MSSA-NEXT: [[GEP1:%.*]] = getelementptr , ptr [[P:%.*]], i64 0, i64 1 -; MSSA-NEXT: [[LOAD1:%.*]] = load i32, ptr [[GEP1]], align 4 -; MSSA-NEXT: [[LOAD2:%.*]] = load i32, ptr [[GEP1]], align 4 -; MSSA-NEXT: [[ADD:%.*]] = add i32 [[LOAD1]], [[LOAD2]] -; MSSA-NEXT: ret i32 [[ADD]] +; CHECK-LABEL: @load_clobber_load_gep1( +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr , ptr [[P:%.*]], i64 0, i64 1 +; CHECK-NEXT: [[LOAD1:%.*]] = load i32, ptr [[GEP1]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LOAD1]], [[LOAD1]] +; CHECK-NEXT: ret i32 [[ADD]] ; %gep1 = getelementptr , ptr %p, i64 0, i64 1 %load1 = load i32, ptr %gep1 @@ -154,14 +132,9 @@ define @load_clobber_load_sideeffect(ptr %p) { ; Analyze Load from clobbering Store. define @store_forward_to_load(ptr %p) { -; MDEP-LABEL: @store_forward_to_load( -; MDEP-NEXT: store zeroinitializer, ptr [[P:%.*]], align 16 -; MDEP-NEXT: ret zeroinitializer -; -; MSSA-LABEL: @store_forward_to_load( -; MSSA-NEXT: store zeroinitializer, ptr [[P:%.*]], align 16 -; MSSA-NEXT: [[LOAD:%.*]] = load , ptr [[P]], align 16 -; MSSA-NEXT: ret [[LOAD]] +; CHECK-LABEL: @store_forward_to_load( +; CHECK-NEXT: store zeroinitializer, ptr [[P:%.*]], align 16 +; CHECK-NEXT: ret zeroinitializer ; store zeroinitializer, ptr %p %load = load , ptr %p @@ -201,15 +174,9 @@ define i32 @store_clobber_load() { declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) define i32 @memset_clobber_load(ptr %p) { -; MDEP-LABEL: @memset_clobber_load( -; MDEP-NEXT: tail call void @llvm.memset.p0.i64(ptr [[P:%.*]], i8 1, i64 200, i1 false) -; MDEP-NEXT: ret i32 16843009 -; -; MSSA-LABEL: @memset_clobber_load( -; MSSA-NEXT: tail call void @llvm.memset.p0.i64(ptr [[P:%.*]], i8 1, i64 200, i1 false) -; MSSA-NEXT: [[GEP:%.*]] = getelementptr , ptr [[P]], i64 0, i64 5 -; MSSA-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4 -; MSSA-NEXT: ret i32 [[LOAD]] +; CHECK-LABEL: @memset_clobber_load( +; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr [[P:%.*]], i8 1, i64 200, i1 false) +; CHECK-NEXT: ret i32 16843009 ; tail call void @llvm.memset.p0.i64(ptr %p, i8 1, i64 200, i1 false) %gep = getelementptr , ptr %p, i64 0, i64 5 @@ -247,28 +214,15 @@ define i32 @memset_clobber_load_nonconst_index(ptr %p, i64 %idx1, i64 %idx2) { ; Load elimination across BBs define ptr @load_from_alloc_replaced_with_undef() { -; MDEP-LABEL: @load_from_alloc_replaced_with_undef( -; MDEP-NEXT: entry: -; MDEP-NEXT: [[A:%.*]] = alloca , align 16 -; MDEP-NEXT: br i1 undef, label [[IF_END:%.*]], label [[IF_THEN:%.*]] -; MDEP: if.then: -; MDEP-NEXT: store zeroinitializer, ptr [[A]], align 16 -; MDEP-NEXT: br label [[IF_END]] -; MDEP: if.end: -; MDEP-NEXT: ret ptr [[A]] -; -; MSSA-LABEL: @load_from_alloc_replaced_with_undef( -; MSSA-NEXT: entry: -; MSSA-NEXT: [[A:%.*]] = alloca , align 16 -; MSSA-NEXT: [[GEP:%.*]] = getelementptr , ptr [[A]], i64 0, i64 1 -; MSSA-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4 -; MSSA-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[LOAD]], 0 -; MSSA-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] -; MSSA: if.then: -; MSSA-NEXT: store zeroinitializer, ptr [[A]], align 16 -; MSSA-NEXT: br label [[IF_END]] -; MSSA: if.end: -; MSSA-NEXT: ret ptr [[A]] +; CHECK-LABEL: @load_from_alloc_replaced_with_undef( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca , align 16 +; CHECK-NEXT: br i1 undef, label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: store zeroinitializer, ptr [[A]], align 16 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: ret ptr [[A]] ; entry: %a = alloca @@ -286,29 +240,16 @@ if.end: } define i32 @redundant_load_elimination_1(ptr %p) { -; MDEP-LABEL: @redundant_load_elimination_1( -; MDEP-NEXT: entry: -; MDEP-NEXT: [[GEP:%.*]] = getelementptr , ptr [[P:%.*]], i64 1, i64 1 -; MDEP-NEXT: [[LOAD1:%.*]] = load i32, ptr [[GEP]], align 4 -; MDEP-NEXT: [[CMP:%.*]] = icmp eq i32 [[LOAD1]], 0 -; MDEP-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] -; MDEP: if.then: -; MDEP-NEXT: br label [[IF_END]] -; MDEP: if.end: -; MDEP-NEXT: ret i32 [[LOAD1]] -; -; MSSA-LABEL: @redundant_load_elimination_1( -; MSSA-NEXT: entry: -; MSSA-NEXT: [[GEP:%.*]] = getelementptr , ptr [[P:%.*]], i64 1, i64 1 -; MSSA-NEXT: [[LOAD1:%.*]] = load i32, ptr [[GEP]], align 4 -; MSSA-NEXT: [[CMP:%.*]] = icmp eq i32 [[LOAD1]], 0 -; MSSA-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] -; MSSA: if.then: -; MSSA-NEXT: [[LOAD2:%.*]] = load i32, ptr [[GEP]], align 4 -; MSSA-NEXT: br label [[IF_END]] -; MSSA: if.end: -; MSSA-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOAD2]], [[IF_THEN]] ], [ [[LOAD1]], [[ENTRY:%.*]] ] -; MSSA-NEXT: ret i32 [[RESULT]] +; CHECK-LABEL: @redundant_load_elimination_1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GEP:%.*]] = getelementptr , ptr [[P:%.*]], i64 1, i64 1 +; CHECK-NEXT: [[LOAD1:%.*]] = load i32, ptr [[GEP]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[LOAD1]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: ret i32 [[LOAD1]] ; entry: %gep = getelementptr , ptr %p, i64 1, i64 1 @@ -359,30 +300,17 @@ if.else: } define void @redundant_load_elimination_zero_index(i1 %c, ptr %p, ptr %q) { -; MDEP-LABEL: @redundant_load_elimination_zero_index( -; MDEP-NEXT: entry: -; MDEP-NEXT: [[GEP1:%.*]] = getelementptr , ptr [[P:%.*]], i64 0, i64 1 -; MDEP-NEXT: store i32 0, ptr [[GEP1]], align 4 -; MDEP-NEXT: store i32 1, ptr [[P]], align 4 -; MDEP-NEXT: br i1 [[C:%.*]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]] -; MDEP: if.then: -; MDEP-NEXT: store i32 0, ptr [[Q:%.*]], align 4 -; MDEP-NEXT: ret void -; MDEP: if.else: -; MDEP-NEXT: ret void -; -; MSSA-LABEL: @redundant_load_elimination_zero_index( -; MSSA-NEXT: entry: -; MSSA-NEXT: [[GEP1:%.*]] = getelementptr , ptr [[P:%.*]], i64 0, i64 1 -; MSSA-NEXT: store i32 0, ptr [[GEP1]], align 4 -; MSSA-NEXT: store i32 1, ptr [[P]], align 4 -; MSSA-NEXT: br i1 [[C:%.*]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]] -; MSSA: if.then: -; MSSA-NEXT: [[T:%.*]] = load i32, ptr [[GEP1]], align 4 -; MSSA-NEXT: store i32 [[T]], ptr [[Q:%.*]], align 4 -; MSSA-NEXT: ret void -; MSSA: if.else: -; MSSA-NEXT: ret void +; CHECK-LABEL: @redundant_load_elimination_zero_index( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr , ptr [[P:%.*]], i64 0, i64 1 +; CHECK-NEXT: store i32 0, ptr [[GEP1]], align 4 +; CHECK-NEXT: store i32 1, ptr [[P]], align 4 +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: store i32 0, ptr [[Q:%.*]], align 4 +; CHECK-NEXT: ret void +; CHECK: if.else: +; CHECK-NEXT: ret void ; entry: %gep1 = getelementptr , ptr %p, i64 0, i64 1 @@ -400,34 +328,19 @@ if.else: } define void @redundant_load_elimination_zero_index_1(i1 %c, ptr %p, ptr %q, i64 %i) { -; MDEP-LABEL: @redundant_load_elimination_zero_index_1( -; MDEP-NEXT: entry: -; MDEP-NEXT: [[J:%.*]] = add i64 [[I:%.*]], 1 -; MDEP-NEXT: [[GEP1:%.*]] = getelementptr , ptr [[P:%.*]], i64 0, i64 [[J]] -; MDEP-NEXT: store i32 0, ptr [[GEP1]], align 4 -; MDEP-NEXT: [[GEP2:%.*]] = getelementptr , ptr [[P]], i64 0, i64 [[I]] -; MDEP-NEXT: store i32 1, ptr [[GEP2]], align 4 -; MDEP-NEXT: br i1 [[C:%.*]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]] -; MDEP: if.then: -; MDEP-NEXT: store i32 0, ptr [[Q:%.*]], align 4 -; MDEP-NEXT: ret void -; MDEP: if.else: -; MDEP-NEXT: ret void -; -; MSSA-LABEL: @redundant_load_elimination_zero_index_1( -; MSSA-NEXT: entry: -; MSSA-NEXT: [[J:%.*]] = add i64 [[I:%.*]], 1 -; MSSA-NEXT: [[GEP1:%.*]] = getelementptr , ptr [[P:%.*]], i64 0, i64 [[J]] -; MSSA-NEXT: store i32 0, ptr [[GEP1]], align 4 -; MSSA-NEXT: [[GEP2:%.*]] = getelementptr , ptr [[P]], i64 0, i64 [[I]] -; MSSA-NEXT: store i32 1, ptr [[GEP2]], align 4 -; MSSA-NEXT: br i1 [[C:%.*]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]] -; MSSA: if.then: -; MSSA-NEXT: [[T:%.*]] = load i32, ptr [[GEP1]], align 4 -; MSSA-NEXT: store i32 [[T]], ptr [[Q:%.*]], align 4 -; MSSA-NEXT: ret void -; MSSA: if.else: -; MSSA-NEXT: ret void +; CHECK-LABEL: @redundant_load_elimination_zero_index_1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[J:%.*]] = add i64 [[I:%.*]], 1 +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr , ptr [[P:%.*]], i64 0, i64 [[J]] +; CHECK-NEXT: store i32 0, ptr [[GEP1]], align 4 +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr , ptr [[P]], i64 0, i64 [[I]] +; CHECK-NEXT: store i32 1, ptr [[GEP2]], align 4 +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: store i32 0, ptr [[Q:%.*]], align 4 +; CHECK-NEXT: ret void +; CHECK: if.else: +; CHECK-NEXT: ret void ; entry: %j = add i64 %i, 1 @@ -478,15 +391,10 @@ if.else: ; Different sizes / types define @load_v16i8_store_v4i32_forward_load(ptr %p, %x) { -; MDEP-LABEL: @load_v16i8_store_v4i32_forward_load( -; MDEP-NEXT: store [[X:%.*]], ptr [[P:%.*]], align 16 -; MDEP-NEXT: [[TMP1:%.*]] = bitcast [[X]] to -; MDEP-NEXT: ret [[TMP1]] -; -; MSSA-LABEL: @load_v16i8_store_v4i32_forward_load( -; MSSA-NEXT: store [[X:%.*]], ptr [[P:%.*]], align 16 -; MSSA-NEXT: [[LOAD:%.*]] = load , ptr [[P]], align 16 -; MSSA-NEXT: ret [[LOAD]] +; CHECK-LABEL: @load_v16i8_store_v4i32_forward_load( +; CHECK-NEXT: store [[X:%.*]], ptr [[P:%.*]], align 16 +; CHECK-NEXT: [[LOAD:%.*]] = bitcast [[X]] to +; CHECK-NEXT: ret [[LOAD]] ; store %x, ptr %p %load = load , ptr %p @@ -494,15 +402,10 @@ define @load_v16i8_store_v4i32_forward_load(ptr %p, @load_v4f32_store_v4i32_forward_load(ptr %p, %x) { -; MDEP-LABEL: @load_v4f32_store_v4i32_forward_load( -; MDEP-NEXT: store [[X:%.*]], ptr [[P:%.*]], align 16 -; MDEP-NEXT: [[TMP1:%.*]] = bitcast [[X]] to -; MDEP-NEXT: ret [[TMP1]] -; -; MSSA-LABEL: @load_v4f32_store_v4i32_forward_load( -; MSSA-NEXT: store [[X:%.*]], ptr [[P:%.*]], align 16 -; MSSA-NEXT: [[LOAD:%.*]] = load , ptr [[P]], align 16 -; MSSA-NEXT: ret [[LOAD]] +; CHECK-LABEL: @load_v4f32_store_v4i32_forward_load( +; CHECK-NEXT: store [[X:%.*]], ptr [[P:%.*]], align 16 +; CHECK-NEXT: [[LOAD:%.*]] = bitcast [[X]] to +; CHECK-NEXT: ret [[LOAD]] ; store %x, ptr %p %load = load , ptr %p @@ -510,15 +413,10 @@ define @load_v4f32_store_v4i32_forward_load(ptr %p, @load_v4f32_store_v16i8_forward_load(ptr %p, %x) { -; MDEP-LABEL: @load_v4f32_store_v16i8_forward_load( -; MDEP-NEXT: store [[X:%.*]], ptr [[P:%.*]], align 16 -; MDEP-NEXT: [[TMP1:%.*]] = bitcast [[X]] to -; MDEP-NEXT: ret [[TMP1]] -; -; MSSA-LABEL: @load_v4f32_store_v16i8_forward_load( -; MSSA-NEXT: store [[X:%.*]], ptr [[P:%.*]], align 16 -; MSSA-NEXT: [[LOAD:%.*]] = load , ptr [[P]], align 16 -; MSSA-NEXT: ret [[LOAD]] +; CHECK-LABEL: @load_v4f32_store_v16i8_forward_load( +; CHECK-NEXT: store [[X:%.*]], ptr [[P:%.*]], align 16 +; CHECK-NEXT: [[LOAD:%.*]] = bitcast [[X]] to +; CHECK-NEXT: ret [[LOAD]] ; store %x, ptr %p %load = load , ptr %p @@ -526,15 +424,10 @@ define @load_v4f32_store_v16i8_forward_load(ptr %p, @load_v4i32_store_v4f32_forward_load(ptr %p, %x) { -; MDEP-LABEL: @load_v4i32_store_v4f32_forward_load( -; MDEP-NEXT: store [[X:%.*]], ptr [[P:%.*]], align 16 -; MDEP-NEXT: [[TMP1:%.*]] = bitcast [[X]] to -; MDEP-NEXT: ret [[TMP1]] -; -; MSSA-LABEL: @load_v4i32_store_v4f32_forward_load( -; MSSA-NEXT: store [[X:%.*]], ptr [[P:%.*]], align 16 -; MSSA-NEXT: [[LOAD:%.*]] = load , ptr [[P]], align 16 -; MSSA-NEXT: ret [[LOAD]] +; CHECK-LABEL: @load_v4i32_store_v4f32_forward_load( +; CHECK-NEXT: store [[X:%.*]], ptr [[P:%.*]], align 16 +; CHECK-NEXT: [[LOAD:%.*]] = bitcast [[X]] to +; CHECK-NEXT: ret [[LOAD]] ; store %x, ptr %p %load = load , ptr %p @@ -601,16 +494,11 @@ define @load_v2i32_store_v4i32_forward_load_offsetc(ptr %p, < } define @load_v2p0_store_v4i32_forward_load(ptr %p, %x) { -; MDEP-LABEL: @load_v2p0_store_v4i32_forward_load( -; MDEP-NEXT: store [[X:%.*]], ptr [[P:%.*]], align 16 -; MDEP-NEXT: [[TMP1:%.*]] = bitcast [[X]] to -; MDEP-NEXT: [[TMP2:%.*]] = inttoptr [[TMP1]] to -; MDEP-NEXT: ret [[TMP2]] -; -; MSSA-LABEL: @load_v2p0_store_v4i32_forward_load( -; MSSA-NEXT: store [[X:%.*]], ptr [[P:%.*]], align 16 -; MSSA-NEXT: [[LOAD:%.*]] = load , ptr [[P]], align 16 -; MSSA-NEXT: ret [[LOAD]] +; CHECK-LABEL: @load_v2p0_store_v4i32_forward_load( +; CHECK-NEXT: store [[X:%.*]], ptr [[P:%.*]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast [[X]] to +; CHECK-NEXT: [[LOAD:%.*]] = inttoptr [[TMP1]] to +; CHECK-NEXT: ret [[LOAD]] ; store %x, ptr %p %load = load , ptr %p @@ -618,15 +506,10 @@ define @load_v2p0_store_v4i32_forward_load(ptr %p, @load_v2i64_store_v2p0_forward_load(ptr %p, %x) { -; MDEP-LABEL: @load_v2i64_store_v2p0_forward_load( -; MDEP-NEXT: store [[X:%.*]], ptr [[P:%.*]], align 16 -; MDEP-NEXT: [[TMP1:%.*]] = ptrtoint [[X]] to -; MDEP-NEXT: ret [[TMP1]] -; -; MSSA-LABEL: @load_v2i64_store_v2p0_forward_load( -; MSSA-NEXT: store [[X:%.*]], ptr [[P:%.*]], align 16 -; MSSA-NEXT: [[LOAD:%.*]] = load , ptr [[P]], align 16 -; MSSA-NEXT: ret [[LOAD]] +; CHECK-LABEL: @load_v2i64_store_v2p0_forward_load( +; CHECK-NEXT: store [[X:%.*]], ptr [[P:%.*]], align 16 +; CHECK-NEXT: [[LOAD:%.*]] = ptrtoint [[X]] to +; CHECK-NEXT: ret [[LOAD]] ; store %x, ptr %p %load = load , ptr %p @@ -656,14 +539,9 @@ define <16 x i8> @load_v16i8_store_nxv4i32_forward_load(ptr %p, @load_v16i8_store_v4i32_forward_constant(ptr %p) { -; MDEP-LABEL: @load_v16i8_store_v4i32_forward_constant( -; MDEP-NEXT: store splat (i32 4), ptr [[P:%.*]], align 16 -; MDEP-NEXT: ret bitcast ( splat (i32 4) to ) -; -; MSSA-LABEL: @load_v16i8_store_v4i32_forward_constant( -; MSSA-NEXT: store splat (i32 4), ptr [[P:%.*]], align 16 -; MSSA-NEXT: [[LOAD:%.*]] = load , ptr [[P]], align 16 -; MSSA-NEXT: ret [[LOAD]] +; CHECK-LABEL: @load_v16i8_store_v4i32_forward_constant( +; CHECK-NEXT: store splat (i32 4), ptr [[P:%.*]], align 16 +; CHECK-NEXT: ret bitcast ( splat (i32 4) to ) ; store splat (i32 4), ptr %p %load = load , ptr %p @@ -693,65 +571,35 @@ define {} @load_v16i8_store_v4i32_struct_forward_load(ptr %p, } define { , , , } @bigexample({ , , , } %a) vscale_range(1,16) { -; MDEP-LABEL: @bigexample( -; MDEP-NEXT: entry: -; MDEP-NEXT: [[REF_TMP:%.*]] = alloca { , , , }, align 16 -; MDEP-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr nonnull [[REF_TMP]]) -; MDEP-NEXT: [[A_ELT:%.*]] = extractvalue { , , , } [[A:%.*]], 0 -; MDEP-NEXT: store [[A_ELT]], ptr [[REF_TMP]], align 16 -; MDEP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; MDEP-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 4 -; MDEP-NEXT: [[REF_TMP_REPACK1:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP]], i64 [[TMP1]] -; MDEP-NEXT: [[A_ELT2:%.*]] = extractvalue { , , , } [[A]], 1 -; MDEP-NEXT: store [[A_ELT2]], ptr [[REF_TMP_REPACK1]], align 16 -; MDEP-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0]], 5 -; MDEP-NEXT: [[REF_TMP_REPACK3:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP]], i64 [[TMP2]] -; MDEP-NEXT: [[A_ELT4:%.*]] = extractvalue { , , , } [[A]], 2 -; MDEP-NEXT: store [[A_ELT4]], ptr [[REF_TMP_REPACK3]], align 16 -; MDEP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP0]], 48 -; MDEP-NEXT: [[REF_TMP_REPACK5:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP]], i64 [[TMP3]] -; MDEP-NEXT: [[A_ELT6:%.*]] = extractvalue { , , , } [[A]], 3 -; MDEP-NEXT: store [[A_ELT6]], ptr [[REF_TMP_REPACK5]], align 16 -; MDEP-NEXT: [[TMP4:%.*]] = bitcast [[A_ELT]] to -; MDEP-NEXT: [[TMP5:%.*]] = insertvalue { , , , } poison, [[TMP4]], 0 -; MDEP-NEXT: [[TMP6:%.*]] = bitcast [[A_ELT2]] to -; MDEP-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP5]], [[TMP6]], 1 -; MDEP-NEXT: [[TMP8:%.*]] = bitcast [[A_ELT4]] to -; MDEP-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP7]], [[TMP8]], 2 -; MDEP-NEXT: [[TMP10:%.*]] = bitcast [[A_ELT6]] to -; MDEP-NEXT: [[TMP11:%.*]] = insertvalue { , , , } [[TMP9]], [[TMP10]], 3 -; MDEP-NEXT: call void @llvm.lifetime.end.p0(i64 -1, ptr nonnull [[REF_TMP]]) -; MDEP-NEXT: ret { , , , } [[TMP11]] -; -; MSSA-LABEL: @bigexample( -; MSSA-NEXT: entry: -; MSSA-NEXT: [[REF_TMP:%.*]] = alloca { , , , }, align 16 -; MSSA-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr nonnull [[REF_TMP]]) -; MSSA-NEXT: [[A_ELT:%.*]] = extractvalue { , , , } [[A:%.*]], 0 -; MSSA-NEXT: store [[A_ELT]], ptr [[REF_TMP]], align 16 -; MSSA-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; MSSA-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 4 -; MSSA-NEXT: [[REF_TMP_REPACK1:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP]], i64 [[TMP1]] -; MSSA-NEXT: [[A_ELT2:%.*]] = extractvalue { , , , } [[A]], 1 -; MSSA-NEXT: store [[A_ELT2]], ptr [[REF_TMP_REPACK1]], align 16 -; MSSA-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0]], 5 -; MSSA-NEXT: [[REF_TMP_REPACK3:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP]], i64 [[TMP2]] -; MSSA-NEXT: [[A_ELT4:%.*]] = extractvalue { , , , } [[A]], 2 -; MSSA-NEXT: store [[A_ELT4]], ptr [[REF_TMP_REPACK3]], align 16 -; MSSA-NEXT: [[TMP3:%.*]] = mul i64 [[TMP0]], 48 -; MSSA-NEXT: [[REF_TMP_REPACK5:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP]], i64 [[TMP3]] -; MSSA-NEXT: [[A_ELT6:%.*]] = extractvalue { , , , } [[A]], 3 -; MSSA-NEXT: store [[A_ELT6]], ptr [[REF_TMP_REPACK5]], align 16 -; MSSA-NEXT: [[DOTUNPACK:%.*]] = load , ptr [[REF_TMP]], align 16 -; MSSA-NEXT: [[TMP4:%.*]] = insertvalue { , , , } poison, [[DOTUNPACK]], 0 -; MSSA-NEXT: [[DOTUNPACK8:%.*]] = load , ptr [[REF_TMP_REPACK1]], align 16 -; MSSA-NEXT: [[TMP5:%.*]] = insertvalue { , , , } [[TMP4]], [[DOTUNPACK8]], 1 -; MSSA-NEXT: [[DOTUNPACK10:%.*]] = load , ptr [[REF_TMP_REPACK3]], align 16 -; MSSA-NEXT: [[TMP6:%.*]] = insertvalue { , , , } [[TMP5]], [[DOTUNPACK10]], 2 -; MSSA-NEXT: [[DOTUNPACK12:%.*]] = load , ptr [[REF_TMP_REPACK5]], align 16 -; MSSA-NEXT: [[TMP7:%.*]] = insertvalue { , , , } [[TMP6]], [[DOTUNPACK12]], 3 -; MSSA-NEXT: call void @llvm.lifetime.end.p0(i64 -1, ptr nonnull [[REF_TMP]]) -; MSSA-NEXT: ret { , , , } [[TMP7]] +; CHECK-LABEL: @bigexample( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[REF_TMP:%.*]] = alloca { , , , }, align 16 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr nonnull [[REF_TMP]]) +; CHECK-NEXT: [[A_ELT:%.*]] = extractvalue { , , , } [[A:%.*]], 0 +; CHECK-NEXT: store [[A_ELT]], ptr [[REF_TMP]], align 16 +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 4 +; CHECK-NEXT: [[REF_TMP_REPACK1:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP]], i64 [[TMP1]] +; CHECK-NEXT: [[A_ELT2:%.*]] = extractvalue { , , , } [[A]], 1 +; CHECK-NEXT: store [[A_ELT2]], ptr [[REF_TMP_REPACK1]], align 16 +; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP0]], 5 +; CHECK-NEXT: [[REF_TMP_REPACK3:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP]], i64 [[TMP3]] +; CHECK-NEXT: [[A_ELT4:%.*]] = extractvalue { , , , } [[A]], 2 +; CHECK-NEXT: store [[A_ELT4]], ptr [[REF_TMP_REPACK3]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP0]], 48 +; CHECK-NEXT: [[REF_TMP_REPACK5:%.*]] = getelementptr inbounds i8, ptr [[REF_TMP]], i64 [[TMP5]] +; CHECK-NEXT: [[A_ELT6:%.*]] = extractvalue { , , , } [[A]], 3 +; CHECK-NEXT: store [[A_ELT6]], ptr [[REF_TMP_REPACK5]], align 16 +; CHECK-NEXT: [[DOTUNPACK:%.*]] = bitcast [[A_ELT]] to +; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { , , , } poison, [[DOTUNPACK]], 0 +; CHECK-NEXT: [[DOTUNPACK8:%.*]] = bitcast [[A_ELT2]] to +; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { , , , } [[TMP6]], [[DOTUNPACK8]], 1 +; CHECK-NEXT: [[DOTUNPACK10:%.*]] = bitcast [[A_ELT4]] to +; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { , , , } [[TMP9]], [[DOTUNPACK10]], 2 +; CHECK-NEXT: [[DOTUNPACK12:%.*]] = bitcast [[A_ELT6]] to +; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { , , , } [[TMP12]], [[DOTUNPACK12]], 3 +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 -1, ptr nonnull [[REF_TMP]]) +; CHECK-NEXT: ret { , , , } [[TMP15]] ; entry: %ref.tmp = alloca { , , , }, align 16 @@ -795,21 +643,12 @@ entry: } define @scalable_store_to_fixed_load( %.coerce) vscale_range(4,4) { -; MDEP-LABEL: @scalable_store_to_fixed_load( -; MDEP-NEXT: entry: -; MDEP-NEXT: [[RETVAL:%.*]] = alloca { <16 x float> }, align 64 -; MDEP-NEXT: [[TMP0:%.*]] = fadd [[DOTCOERCE:%.*]], [[DOTCOERCE]] -; MDEP-NEXT: store [[TMP0]], ptr [[RETVAL]], align 16 -; MDEP-NEXT: ret [[TMP0]] -; -; MSSA-LABEL: @scalable_store_to_fixed_load( -; MSSA-NEXT: entry: -; MSSA-NEXT: [[RETVAL:%.*]] = alloca { <16 x float> }, align 64 -; MSSA-NEXT: [[TMP0:%.*]] = fadd [[DOTCOERCE:%.*]], [[DOTCOERCE]] -; MSSA-NEXT: store [[TMP0]], ptr [[RETVAL]], align 16 -; MSSA-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr [[RETVAL]], align 64 -; MSSA-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv4f32.v16f32( poison, <16 x float> [[TMP1]], i64 0) -; MSSA-NEXT: ret [[CAST_SCALABLE]] +; CHECK-LABEL: @scalable_store_to_fixed_load( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RETVAL:%.*]] = alloca { <16 x float> }, align 64 +; CHECK-NEXT: [[TMP0:%.*]] = fadd [[DOTCOERCE:%.*]], [[DOTCOERCE]] +; CHECK-NEXT: store [[TMP0]], ptr [[RETVAL]], align 16 +; CHECK-NEXT: ret [[TMP0]] ; entry: %retval = alloca { <16 x float> } @@ -822,19 +661,11 @@ entry: ; Here, only the lower bound for the vscale is known, but this is enough to allow a forward to a load to 16 elements. define @scalable_store_to_fixed_load_only_lower_bound( %a) vscale_range(4) { -; MDEP-LABEL: @scalable_store_to_fixed_load_only_lower_bound( -; MDEP-NEXT: entry: -; MDEP-NEXT: [[RETVAL:%.*]] = alloca { }, align 16 -; MDEP-NEXT: store [[A:%.*]], ptr [[RETVAL]], align 16 -; MDEP-NEXT: ret [[A]] -; -; MSSA-LABEL: @scalable_store_to_fixed_load_only_lower_bound( -; MSSA-NEXT: entry: -; MSSA-NEXT: [[RETVAL:%.*]] = alloca { }, align 16 -; MSSA-NEXT: store [[A:%.*]], ptr [[RETVAL]], align 16 -; MSSA-NEXT: [[TMP0:%.*]] = load <16 x float>, ptr [[RETVAL]], align 64 -; MSSA-NEXT: [[CAST_SCALABLE:%.*]] = tail call @llvm.vector.insert.nxv4f32.v16f32( poison, <16 x float> [[TMP0]], i64 0) -; MSSA-NEXT: ret [[CAST_SCALABLE]] +; CHECK-LABEL: @scalable_store_to_fixed_load_only_lower_bound( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RETVAL:%.*]] = alloca { }, align 16 +; CHECK-NEXT: store [[A:%.*]], ptr [[RETVAL]], align 16 +; CHECK-NEXT: ret [[A]] ; entry: %retval = alloca { } @@ -921,19 +752,12 @@ entry: ; This function does not have a fixed vscale, but the loaded vector is still known ; to be smaller or equal in size compared to the stored vector. define <4 x float> @scalable_store_to_small_fixed_load( %a) { -; MDEP-LABEL: @scalable_store_to_small_fixed_load( -; MDEP-NEXT: entry: -; MDEP-NEXT: [[PTR:%.*]] = alloca , align 16 -; MDEP-NEXT: store [[A:%.*]], ptr [[PTR]], align 16 -; MDEP-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.vector.extract.v4f32.nxv4f32( [[A]], i64 0) -; MDEP-NEXT: ret <4 x float> [[TMP0]] -; -; MSSA-LABEL: @scalable_store_to_small_fixed_load( -; MSSA-NEXT: entry: -; MSSA-NEXT: [[PTR:%.*]] = alloca , align 16 -; MSSA-NEXT: store [[A:%.*]], ptr [[PTR]], align 16 -; MSSA-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[PTR]], align 16 -; MSSA-NEXT: ret <4 x float> [[TMP0]] +; CHECK-LABEL: @scalable_store_to_small_fixed_load( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[PTR:%.*]] = alloca , align 16 +; CHECK-NEXT: store [[A:%.*]], ptr [[PTR]], align 16 +; CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.vector.extract.v4f32.nxv4f32( [[A]], i64 0) +; CHECK-NEXT: ret <4 x float> [[TMP0]] ; entry: %ptr = alloca diff --git a/llvm/test/Transforms/LoopUnroll/peel-last-iteration.ll b/llvm/test/Transforms/LoopUnroll/peel-last-iteration.ll index 78a13b83ec8d1..e04786e50e96c 100644 --- a/llvm/test/Transforms/LoopUnroll/peel-last-iteration.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-last-iteration.ll @@ -1,33 +1,21 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -p loop-unroll -unroll-full-max-count=0 -S %s | FileCheck %s +; RUN: opt -p loop-unroll -S %s | FileCheck %s define i64 @peel_single_block_loop_iv_step_1() { ; CHECK-LABEL: define i64 @peel_single_block_loop_iv_step_1() { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT1:%.*]], %[[LOOP]] ] -; CHECK-NEXT: call void @foo(i32 20) -; CHECK-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 -; CHECK-NEXT: [[EC1:%.*]] = icmp ne i64 [[IV_NEXT1]], 63 -; CHECK-NEXT: br i1 [[EC1]], label %[[LOOP]], label %[[EXIT_PEEL_BEGIN:.*]], !llvm.loop [[LOOP0:![0-9]+]] -; CHECK: [[EXIT_PEEL_BEGIN]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT1]], %[[LOOP]] ] -; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV1]], %[[LOOP]] ] -; CHECK-NEXT: br label %[[LOOP_PEEL:.*]] -; CHECK: [[LOOP_PEEL]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[CMP18_NOT:%.*]] = icmp eq i64 [[IV]], 63 ; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP18_NOT]], i32 10, i32 20 ; CHECK-NEXT: call void @foo(i32 [[COND]]) -; CHECK-NEXT: [[IV_NEXT:%.*]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp ne i64 [[IV_NEXT]], 64 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_PEEL_NEXT:.*]], label %[[EXIT_PEEL_NEXT]] -; CHECK: [[EXIT_PEEL_NEXT]]: -; CHECK-NEXT: br label %[[LOOP_PEEL_NEXT:.*]] -; CHECK: [[LOOP_PEEL_NEXT]]: -; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT:.*]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: ret i64 [[IV]] +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV]], %[[LOOP]] ] +; CHECK-NEXT: ret i64 [[IV_LCSSA]] ; entry: br label %loop @@ -80,33 +68,23 @@ exit: ret i64 %iv } + + define i64 @peel_single_block_loop_iv_step_1_eq_pred() { ; CHECK-LABEL: define i64 @peel_single_block_loop_iv_step_1_eq_pred() { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_LCSSA:%.*]], %[[LOOP]] ] -; CHECK-NEXT: call void @foo(i32 20) -; CHECK-NEXT: [[IV_LCSSA]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[CMP_PEEL:%.*]] = icmp eq i64 [[IV_LCSSA]], 63 -; CHECK-NEXT: br i1 [[CMP_PEEL]], label %[[EXIT_PEEL_BEGIN:.*]], label %[[LOOP]], !llvm.loop [[LOOP2:![0-9]+]] -; CHECK: [[EXIT_PEEL_BEGIN]]: -; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_LCSSA]], %[[LOOP]] ] -; CHECK-NEXT: [[IV_LCSSA1:%.*]] = phi i64 [ [[IV]], %[[LOOP]] ] -; CHECK-NEXT: br label %[[LOOP_PEEL:.*]] -; CHECK: [[LOOP_PEEL]]: -; CHECK-NEXT: [[CMP_PEEL1:%.*]] = icmp eq i64 [[IV_NEXT_LCSSA]], 63 -; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP_PEEL1]], i32 10, i32 20 +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[CMP18_NOT:%.*]] = icmp eq i64 [[IV]], 63 +; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP18_NOT]], i32 10, i32 20 ; CHECK-NEXT: call void @foo(i32 [[COND]]) -; CHECK-NEXT: [[IV_NEXT:%.*]] = add i64 [[IV_NEXT_LCSSA]], 1 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 64 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_PEEL_NEXT:.*]], label %[[EXIT_PEEL_NEXT]] -; CHECK: [[EXIT_PEEL_NEXT]]: -; CHECK-NEXT: br label %[[LOOP_PEEL_NEXT:.*]] -; CHECK: [[LOOP_PEEL_NEXT]]: -; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: ret i64 [[IV_NEXT_LCSSA]] +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV]], %[[LOOP]] ] +; CHECK-NEXT: ret i64 [[IV_LCSSA]] ; entry: br label %loop @@ -158,40 +136,22 @@ exit: define i64 @peel_single_block_loop_iv_step_1_nested_loop() { ; CHECK-LABEL: define i64 @peel_single_block_loop_iv_step_1_nested_loop() { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br label %[[OUTER_HEADER:.*]] ; CHECK: [[OUTER_HEADER]]: -; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[OUTER_IV_NEXT:%.*]], %[[OUTER_LATCH:.*]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[OUTER_HEADER]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: call void @foo(i32 20) -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp ne i64 [[IV_NEXT]], 63 -; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[OUTER_LATCH_PEEL_BEGIN:.*]], !llvm.loop [[LOOP3:![0-9]+]] -; CHECK: [[OUTER_LATCH_PEEL_BEGIN]]: -; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_NEXT]], %[[LOOP]] ] -; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV]], %[[LOOP]] ] -; CHECK-NEXT: br label %[[LOOP_PEEL:.*]] -; CHECK: [[LOOP_PEEL]]: -; CHECK-NEXT: [[CMP_PEEL:%.*]] = icmp eq i64 [[IV_NEXT_LCSSA]], 63 -; CHECK-NEXT: [[COND_PEEL:%.*]] = select i1 [[CMP_PEEL]], i32 10, i32 20 +; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i64 [ 0, %[[OUTER_HEADER]] ], [ [[IV_NEXT_PEEL:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[CMP18_NOT_PEEL:%.*]] = icmp eq i64 [[IV_NEXT_LCSSA]], 63 +; CHECK-NEXT: [[COND_PEEL:%.*]] = select i1 [[CMP18_NOT_PEEL]], i32 10, i32 20 ; CHECK-NEXT: call void @foo(i32 [[COND_PEEL]]) -; CHECK-NEXT: [[IV_NEXT_PEEL:%.*]] = add i64 [[IV_NEXT_LCSSA]], 1 +; CHECK-NEXT: [[IV_NEXT_PEEL]] = add i64 [[IV_NEXT_LCSSA]], 1 ; CHECK-NEXT: [[EC_PEEL:%.*]] = icmp ne i64 [[IV_NEXT_PEEL]], 64 -; CHECK-NEXT: br i1 [[EC_PEEL]], label %[[OUTER_LATCH_PEEL_NEXT:.*]], label %[[OUTER_LATCH_PEEL_NEXT]] -; CHECK: [[OUTER_LATCH_PEEL_NEXT]]: -; CHECK-NEXT: br label %[[LOOP_PEEL_NEXT:.*]] -; CHECK: [[LOOP_PEEL_NEXT]]: -; CHECK-NEXT: br label %[[OUTER_LATCH]] +; CHECK-NEXT: br i1 [[EC_PEEL]], label %[[LOOP]], label %[[OUTER_LATCH:.*]] ; CHECK: [[OUTER_LATCH]]: +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV_NEXT_LCSSA]], %[[LOOP]] ] ; CHECK-NEXT: call void @foo(i32 1) -; CHECK-NEXT: [[OUTER_IV_NEXT]] = add i64 [[OUTER_IV]], 1 -; CHECK-NEXT: [[OUTER_EC:%.*]] = icmp ne i64 [[OUTER_IV_NEXT]], 100 -; CHECK-NEXT: br i1 [[OUTER_EC]], label %[[EXIT:.*]], label %[[OUTER_HEADER]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: [[IV_LCSSA_LCSSA:%.*]] = phi i64 [ [[IV_NEXT_LCSSA]], %[[OUTER_LATCH]] ] -; CHECK-NEXT: ret i64 [[IV_LCSSA_LCSSA]] +; CHECK-NEXT: ret i64 [[IV_LCSSA]] ; entry: br label %outer.header @@ -224,40 +184,22 @@ define i64 @peel_multi_block_loop_iv_step_1() { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ] -; CHECK-NEXT: call void @foo(i32 20) -; CHECK-NEXT: [[C:%.*]] = call i1 @cond() -; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LATCH]] -; CHECK: [[THEN]]: -; CHECK-NEXT: call void @foo(i32 20) -; CHECK-NEXT: br label %[[LATCH]] -; CHECK: [[LATCH]]: -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp ne i64 [[IV_NEXT]], 63 -; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT_PEEL_BEGIN:.*]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: [[EXIT_PEEL_BEGIN]]: -; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_NEXT]], %[[LATCH]] ] -; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV]], %[[LATCH]] ] -; CHECK-NEXT: br label %[[LOOP_PEEL:.*]] -; CHECK: [[LOOP_PEEL]]: -; CHECK-NEXT: [[CMP_PEEL:%.*]] = icmp eq i64 [[IV_NEXT_LCSSA]], 63 -; CHECK-NEXT: [[COND_PEEL:%.*]] = select i1 [[CMP_PEEL]], i32 10, i32 20 +; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_PEEL:%.*]], %[[LATCH:.*]] ] +; CHECK-NEXT: [[CMP18_NOT_PEEL:%.*]] = icmp eq i64 [[IV_NEXT_LCSSA]], 63 +; CHECK-NEXT: [[COND_PEEL:%.*]] = select i1 [[CMP18_NOT_PEEL]], i32 10, i32 20 ; CHECK-NEXT: call void @foo(i32 [[COND_PEEL]]) ; CHECK-NEXT: [[C_PEEL:%.*]] = call i1 @cond() -; CHECK-NEXT: br i1 [[C_PEEL]], label %[[THEN_PEEL:.*]], label %[[LATCH_PEEL:.*]] -; CHECK: [[THEN_PEEL]]: +; CHECK-NEXT: br i1 [[C_PEEL]], label %[[THEN:.*]], label %[[LATCH]] +; CHECK: [[THEN]]: ; CHECK-NEXT: call void @foo(i32 [[COND_PEEL]]) -; CHECK-NEXT: br label %[[LATCH_PEEL]] -; CHECK: [[LATCH_PEEL]]: -; CHECK-NEXT: [[IV_NEXT_PEEL:%.*]] = add i64 [[IV_NEXT_LCSSA]], 1 +; CHECK-NEXT: br label %[[LATCH]] +; CHECK: [[LATCH]]: +; CHECK-NEXT: [[IV_NEXT_PEEL]] = add i64 [[IV_NEXT_LCSSA]], 1 ; CHECK-NEXT: [[EC_PEEL:%.*]] = icmp ne i64 [[IV_NEXT_PEEL]], 64 -; CHECK-NEXT: br i1 [[EC_PEEL]], label %[[EXIT_PEEL_NEXT:.*]], label %[[EXIT_PEEL_NEXT]] -; CHECK: [[EXIT_PEEL_NEXT]]: -; CHECK-NEXT: br label %[[LOOP_PEEL_NEXT:.*]] -; CHECK: [[LOOP_PEEL_NEXT]]: -; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK-NEXT: br i1 [[EC_PEEL]], label %[[LOOP]], label %[[EXIT:.*]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: ret i64 [[IV_NEXT_LCSSA]] +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV_NEXT_LCSSA]], %[[LATCH]] ] +; CHECK-NEXT: ret i64 [[IV_LCSSA]] ; entry: br label %loop @@ -322,80 +264,6 @@ exit: ret i64 %iv } -define i64 @peel_single_block_loop_iv_step_1_btc_0() { -; CHECK-LABEL: define i64 @peel_single_block_loop_iv_step_1_btc_0() { -; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV]], 0 -; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 10, i32 20 -; CHECK-NEXT: call void @foo(i32 [[COND]]) -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp ne i64 [[IV_NEXT]], 1 -; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT:.*]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV]], %[[LOOP]] ] -; CHECK-NEXT: ret i64 [[IV_LCSSA]] -; -entry: - br label %loop - -loop: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] - %cmp = icmp eq i64 %iv, 0 - %cond = select i1 %cmp, i32 10, i32 20 - call void @foo(i32 %cond) - %iv.next = add i64 %iv, 1 - %ec = icmp ne i64 %iv.next, 1 - br i1 %ec, label %loop, label %exit - -exit: - ret i64 %iv -} - -define i64 @peel_single_block_loop_iv_step_1_btc_1() { -; CHECK-LABEL: define i64 @peel_single_block_loop_iv_step_1_btc_1() { -; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT1:%.*]], %[[LOOP]] ] -; CHECK-NEXT: call void @foo(i32 20) -; CHECK-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 -; CHECK-NEXT: br i1 false, label %[[LOOP]], label %[[EXIT_PEEL_BEGIN:.*]], !llvm.loop [[LOOP5:![0-9]+]] -; CHECK: [[EXIT_PEEL_BEGIN]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT1]], %[[LOOP]] ] -; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV1]], %[[LOOP]] ] -; CHECK-NEXT: br label %[[LOOP_PEEL:.*]] -; CHECK: [[LOOP_PEEL]]: -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV]], 1 -; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 10, i32 20 -; CHECK-NEXT: call void @foo(i32 [[COND]]) -; CHECK-NEXT: [[IV_NEXT:%.*]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp ne i64 [[IV_NEXT]], 2 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_PEEL_NEXT:.*]], label %[[EXIT_PEEL_NEXT]] -; CHECK: [[EXIT_PEEL_NEXT]]: -; CHECK-NEXT: br label %[[LOOP_PEEL_NEXT:.*]] -; CHECK: [[LOOP_PEEL_NEXT]]: -; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: ret i64 [[IV]] -; -entry: - br label %loop - -loop: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] - %cmp = icmp eq i64 %iv, 1 - %cond = select i1 %cmp, i32 10, i32 20 - call void @foo(i32 %cond) - %iv.next = add i64 %iv, 1 - %ec = icmp ne i64 %iv.next, 2 - br i1 %ec, label %loop, label %exit - -exit: - ret i64 %iv -} define i64 @peel_single_block_loop_iv_step_1_may_execute_only_once(i64 %n) { ; CHECK-LABEL: define i64 @peel_single_block_loop_iv_step_1_may_execute_only_once( @@ -508,45 +376,25 @@ define i32 @peel_loop_with_branch_and_phi_uses(ptr %x, i1 %c) { ; CHECK: [[LOOP_HEADER_PREHEADER]]: ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] ; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ [[IV_NEXT1:%.*]], %[[LOOP_LATCH:.*]] ], [ 0, %[[LOOP_HEADER_PREHEADER]] ] -; CHECK-NEXT: [[RED1:%.*]] = phi i32 [ [[ADD1:%.*]], %[[LOOP_LATCH]] ], [ 0, %[[LOOP_HEADER_PREHEADER]] ] -; CHECK-NEXT: br i1 false, label %[[IF_THEN:.*]], label %[[LOOP_LATCH]] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ 0, %[[LOOP_HEADER_PREHEADER]] ] +; CHECK-NEXT: [[RED:%.*]] = phi i32 [ [[ADD:%.*]], %[[LOOP_LATCH]] ], [ 0, %[[LOOP_HEADER_PREHEADER]] ] +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[IV]], 99 +; CHECK-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[LOOP_LATCH]] ; CHECK: [[IF_THEN]]: ; CHECK-NEXT: tail call void @foo(i32 10) ; CHECK-NEXT: br label %[[LOOP_LATCH]] ; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[GEP_X1:%.*]] = getelementptr inbounds nuw i32, ptr [[X]], i32 [[IV1]] -; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[GEP_X1]], align 4 -; CHECK-NEXT: [[ADD1]] = add nsw i32 [[L1]], [[RED1]] -; CHECK-NEXT: [[IV_NEXT1]] = add nuw nsw i32 [[IV1]], 1 -; CHECK-NEXT: [[EC1:%.*]] = icmp ne i32 [[IV_NEXT1]], 99 -; CHECK-NEXT: br i1 [[EC1]], label %[[LOOP_HEADER]], label %[[LOOPEXIT_PEEL_BEGIN:.*]], !llvm.loop [[LOOP6:![0-9]+]] -; CHECK: [[LOOPEXIT_PEEL_BEGIN]]: -; CHECK-NEXT: [[RED:%.*]] = phi i32 [ [[ADD1]], %[[LOOP_LATCH]] ] -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT1]], %[[LOOP_LATCH]] ] -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD1]], %[[LOOP_LATCH]] ] -; CHECK-NEXT: br label %[[LOOP_HEADER_PEEL:.*]] -; CHECK: [[LOOP_HEADER_PEEL]]: -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[IV]], 99 -; CHECK-NEXT: br i1 [[CMP1]], label %[[IF_THEN_PEEL:.*]], label %[[LOOP_LATCH_PEEL:.*]] -; CHECK: [[IF_THEN_PEEL]]: -; CHECK-NEXT: tail call void @foo(i32 10) -; CHECK-NEXT: br label %[[LOOP_LATCH_PEEL]] -; CHECK: [[LOOP_LATCH_PEEL]]: ; CHECK-NEXT: [[GEP_X:%.*]] = getelementptr inbounds nuw i32, ptr [[X]], i32 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_X]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[L]], [[RED]] -; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[ADD]] = add nsw i32 [[L]], [[RED]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp ne i32 [[IV_NEXT]], 100 -; CHECK-NEXT: br i1 [[EC]], label %[[LOOPEXIT_PEEL_NEXT:.*]], label %[[LOOPEXIT_PEEL_NEXT]] -; CHECK: [[LOOPEXIT_PEEL_NEXT]]: -; CHECK-NEXT: br label %[[LOOP_HEADER_PEEL_NEXT:.*]] -; CHECK: [[LOOP_HEADER_PEEL_NEXT]]: -; CHECK-NEXT: br label %[[LOOPEXIT:.*]] +; CHECK-NEXT: br i1 [[EC]], label %[[LOOP_HEADER]], label %[[LOOPEXIT:.*]] ; CHECK: [[LOOPEXIT]]: +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], %[[LOOP_LATCH]] ] ; CHECK-NEXT: br label %[[EXIT]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[ADD]], %[[LOOPEXIT]] ] +; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[ADD_LCSSA]], %[[LOOPEXIT]] ] ; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] ; entry: @@ -579,70 +427,5 @@ exit: ret i32 %sum.0.lcssa } -define i64 @peel_multi_exit_multi_latch_loop_iv_step_1(i64 %N) { -; CHECK-LABEL: define i64 @peel_multi_exit_multi_latch_loop_iv_step_1( -; CHECK-SAME: i64 [[N:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_BE:%.*]], %[[LOOP_BACKEDGE:.*]] ] -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV]], 63 -; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 10, i32 20 -; CHECK-NEXT: call void @foo(i32 [[COND]]) -; CHECK-NEXT: [[C_1:%.*]] = icmp eq i64 [[IV]], [[N]] -; CHECK-NEXT: br i1 [[C_1]], label %[[THEN:.*]], label %[[ELSE:.*]] -; CHECK: [[THEN]]: -; CHECK-NEXT: call void @foo(i32 20) -; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EC_1:%.*]] = icmp ne i64 [[IV_NEXT_1]], 64 -; CHECK-NEXT: br i1 [[EC_1]], label %[[EXIT:.*]], label %[[LOOP_BACKEDGE]] -; CHECK: [[LOOP_BACKEDGE]]: -; CHECK-NEXT: [[IV_BE]] = phi i64 [ [[IV_NEXT_1]], %[[THEN]] ], [ [[IV_NEXT_2:%.*]], %[[ELSE]] ] -; CHECK-NEXT: br label %[[LOOP]] -; CHECK: [[ELSE]]: -; CHECK-NEXT: call void @foo(i32 10) -; CHECK-NEXT: [[IV_NEXT_2]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EC_2:%.*]] = icmp ne i64 [[IV_NEXT_2]], 64 -; CHECK-NEXT: br i1 [[EC_2]], label %[[LOOP_BACKEDGE]], label %[[EXIT]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV]], %[[ELSE]] ], [ [[IV]], %[[THEN]] ] -; CHECK-NEXT: ret i64 [[IV_LCSSA]] -; -entry: - br label %loop - -loop: - %iv = phi i64 [ 0, %entry ], [ %iv.next.1, %then ], [ %iv.next.2, %else ] - %cmp = icmp eq i64 %iv, 63 - %cond = select i1 %cmp, i32 10, i32 20 - call void @foo(i32 %cond) - %c.1 = icmp eq i64 %iv, %N - br i1 %c.1, label %then, label %else - -then: - call void @foo(i32 20) - %iv.next.1 = add i64 %iv, 1 - %ec.1 = icmp ne i64 %iv.next.1, 64 - br i1 %ec.1, label %exit, label %loop - -else: - call void @foo(i32 10) - %iv.next.2 = add i64 %iv, 1 - %ec.2 = icmp ne i64 %iv.next.2, 64 - br i1 %ec.2, label %loop, label %exit - -exit: - ret i64 %iv -} - declare void @foo(i32) declare i1 @cond() -;. -; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]} -; CHECK: [[META1]] = !{!"llvm.loop.peeled.count", i32 1} -; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META1]]} -; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]} -; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]]} -; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]} -; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]]} -;. diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/evl-iv-simplify.ll b/llvm/test/Transforms/LoopVectorize/RISCV/evl-iv-simplify.ll deleted file mode 100644 index 4de0e666149f3..0000000000000 --- a/llvm/test/Transforms/LoopVectorize/RISCV/evl-iv-simplify.ll +++ /dev/null @@ -1,333 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 -; RUN: opt -S -mtriple=riscv64 -mattr='+v' --passes='loop(evl-iv-simplify)' < %s | FileCheck %s -; RUN: opt -S -mtriple=riscv64 -mattr='+v' --passes='loop(evl-iv-simplify),function(simplifycfg,dce)' < %s | FileCheck %s --check-prefix=LOOP-DEL - -define void @simple(ptr noalias %a, ptr noalias %b, %c, i64 %N) vscale_range(2, 1024) { -; CHECK-LABEL: define void @simple( -; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N]] -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] -; CHECK-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; CHECK: vector.ph: -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 -; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP8]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP11:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] -; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true) -; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[EVL_BASED_IV]], 0 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP13]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0 -; CHECK-NEXT: [[VP_OP_LOAD1:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], splat (i1 true), i32 [[TMP12]]) -; CHECK-NEXT: [[TMP18:%.*]] = add nsw [[C]], [[VP_OP_LOAD1]] -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]] -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 0 -; CHECK-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP18]], ptr align 4 [[TMP20]], splat (i1 true), i32 [[TMP12]]) -; CHECK-NEXT: [[TMP21:%.*]] = zext i32 [[TMP12]] to i64 -; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]] -; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[ADD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: ret void -; -; LOOP-DEL-LABEL: define void @simple( -; LOOP-DEL-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { -; LOOP-DEL-NEXT: entry: -; LOOP-DEL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N]] -; LOOP-DEL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; LOOP-DEL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 -; LOOP-DEL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] -; LOOP-DEL-NEXT: br i1 [[TMP3]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] -; LOOP-DEL: vector.ph: -; LOOP-DEL-NEXT: br label [[VECTOR_BODY:%.*]] -; LOOP-DEL: vector.body: -; LOOP-DEL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] -; LOOP-DEL-NEXT: [[TMP4:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] -; LOOP-DEL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP4]], i32 4, i1 true) -; LOOP-DEL-NEXT: [[TMP6:%.*]] = add i64 [[EVL_BASED_IV]], 0 -; LOOP-DEL-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP6]] -; LOOP-DEL-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 -; LOOP-DEL-NEXT: [[VP_OP_LOAD1:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP10]], splat (i1 true), i32 [[TMP5]]) -; LOOP-DEL-NEXT: [[TMP11:%.*]] = add nsw [[C]], [[VP_OP_LOAD1]] -; LOOP-DEL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]] -; LOOP-DEL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 -; LOOP-DEL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP11]], ptr align 4 [[TMP13]], splat (i1 true), i32 [[TMP5]]) -; LOOP-DEL-NEXT: [[TMP14:%.*]] = zext i32 [[TMP5]] to i64 -; LOOP-DEL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP14]], [[EVL_BASED_IV]] -; LOOP-DEL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; LOOP-DEL-NEXT: br i1 [[TMP15]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -; LOOP-DEL: for.body: -; LOOP-DEL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; LOOP-DEL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] -; LOOP-DEL-NEXT: [[ADD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; LOOP-DEL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] -; LOOP-DEL-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4 -; LOOP-DEL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; LOOP-DEL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; LOOP-DEL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; LOOP-DEL: for.cond.cleanup: -; LOOP-DEL-NEXT: ret void -; -entry: - %0 = sub i64 -1, %N - %1 = call i64 @llvm.vscale.i64() - %2 = mul i64 %1, 4 - %3 = icmp ult i64 %0, %2 - br i1 %3, label %scalar.ph, label %vector.ph - -vector.ph: ; preds = %entry - %4 = call i64 @llvm.vscale.i64() - %5 = mul i64 %4, 4 - %6 = call i64 @llvm.vscale.i64() - %7 = mul i64 %6, 4 - %8 = sub i64 %7, 1 - %n.rnd.up = add i64 %N, %8 - %n.mod.vf = urem i64 %n.rnd.up, %5 - %n.vec = sub i64 %n.rnd.up, %n.mod.vf - %9 = call i64 @llvm.vscale.i64() - %10 = mul i64 %9, 4 - br label %vector.body - -vector.body: ; preds = %vector.body, %vector.ph - %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %evl.based.iv = phi i64 [ 0, %vector.ph ], [ %index.evl.next, %vector.body ] - %11 = sub i64 %N, %evl.based.iv - %12 = call i32 @llvm.experimental.get.vector.length.i64(i64 %11, i32 4, i1 true) - %13 = add i64 %evl.based.iv, 0 - %14 = getelementptr inbounds i32, ptr %b, i64 %13 - %15 = getelementptr inbounds i32, ptr %14, i32 0 - %vp.op.load = call @llvm.vp.load.nxv4i32.p0(ptr align 4 %15, splat (i1 true), i32 %12) - %18 = add nsw %c, %vp.op.load - %19 = getelementptr inbounds i32, ptr %a, i64 %13 - %20 = getelementptr inbounds i32, ptr %19, i32 0 - call void @llvm.vp.store.nxv4i32.p0( %18, ptr align 4 %20, splat (i1 true), i32 %12) - %21 = zext i32 %12 to i64 - %index.evl.next = add i64 %21, %evl.based.iv - %index.next = add nuw i64 %index, %10 - %22 = icmp eq i64 %index.next, %n.vec - br i1 %22, label %middle.block, label %vector.body, !llvm.loop !0 - -middle.block: ; preds = %vector.body - br i1 true, label %for.cond.cleanup, label %scalar.ph - -scalar.ph: ; preds = %entry, %middle.block - %bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %entry ] - br label %for.body - -for.body: ; preds = %for.body, %scalar.ph - %iv = phi i64 [ %bc.resume.val, %scalar.ph ], [ %iv.next, %for.body ] - %arrayidx = getelementptr inbounds i32, ptr %b, i64 %iv - %23 = load i32, ptr %arrayidx, align 4 - %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %iv - store i32 %23, ptr %arrayidx4, align 4 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, %N - br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !3 - -for.cond.cleanup: ; preds = %middle.block, %for.body - ret void -} - -; Fixed IV steps resulting from vscale_range with a single element - -define void @fixed_iv_step(ptr %arg0, ptr %arg1, i64 %N) #0 { -; CHECK-LABEL: define void @fixed_iv_step( -; CHECK-SAME: ptr [[ARG0:%.*]], ptr [[ARG1:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[VECTOR_PH:%.*]] -; CHECK: vector.ph: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[ARG0]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] -; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP0]], i32 2, i1 true) -; CHECK-NEXT: [[GEP:%.*]] = getelementptr ptr, ptr [[ARG1]], i64 [[EVL_BASED_IV]] -; CHECK-NEXT: tail call void @llvm.vp.store.nxv2p0.p0( [[BROADCAST_SPLAT]], ptr align 8 [[GEP]], splat (i1 true), i32 [[TMP1]]) -; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 -; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[EVL_BASED_IV]], [[TMP2]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_END_LOOPEXIT5:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4]] -; CHECK: for.end.loopexit5: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: for.end: -; CHECK-NEXT: ret void -; -; LOOP-DEL-LABEL: define void @fixed_iv_step( -; LOOP-DEL-SAME: ptr [[ARG0:%.*]], ptr [[ARG1:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] { -; LOOP-DEL-NEXT: entry: -; LOOP-DEL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[ARG0]], i64 0 -; LOOP-DEL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; LOOP-DEL-NEXT: br label [[VECTOR_BODY:%.*]] -; LOOP-DEL: vector.body: -; LOOP-DEL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] -; LOOP-DEL-NEXT: [[TMP0:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] -; LOOP-DEL-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP0]], i32 2, i1 true) -; LOOP-DEL-NEXT: [[GEP:%.*]] = getelementptr ptr, ptr [[ARG1]], i64 [[EVL_BASED_IV]] -; LOOP-DEL-NEXT: tail call void @llvm.vp.store.nxv2p0.p0( [[BROADCAST_SPLAT]], ptr align 8 [[GEP]], splat (i1 true), i32 [[TMP1]]) -; LOOP-DEL-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 -; LOOP-DEL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[EVL_BASED_IV]], [[TMP2]] -; LOOP-DEL-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; LOOP-DEL-NEXT: br i1 [[TMP3]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4]] -; LOOP-DEL: for.end: -; LOOP-DEL-NEXT: ret void -; -entry: - br label %vector.ph - -vector.ph: - %n.rnd.up = add nsw i64 %N, 15 - %n.vec = and i64 %n.rnd.up, -16 - %broadcast.splatinsert = insertelement poison, ptr %arg0, i64 0 - %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer - br label %vector.body - -vector.body: - %lsr.iv32 = phi i64 [ %lsr.iv.next33, %vector.body ], [ %n.vec, %vector.ph ] - %evl.based.iv = phi i64 [ 0, %vector.ph ], [ %index.evl.next, %vector.body ] - %41 = sub i64 %N, %evl.based.iv - %42 = tail call i32 @llvm.experimental.get.vector.length.i64(i64 %41, i32 2, i1 true) - %gep = getelementptr ptr, ptr %arg1, i64 %evl.based.iv - tail call void @llvm.vp.store.nxv2p0.p0( %broadcast.splat, ptr align 8 %gep, splat (i1 true), i32 %42) - %43 = zext i32 %42 to i64 - %index.evl.next = add i64 %evl.based.iv, %43 - %lsr.iv.next33 = add i64 %lsr.iv32, -16 - %44 = icmp eq i64 %lsr.iv.next33, 0 - br i1 %44, label %for.end.loopexit5, label %vector.body, !llvm.loop !3 - -for.end.loopexit5: - br label %for.end - -for.end: - ret void -} - -; Fixed IV step and trip count -define void @fixed_iv_step_tc(ptr %arg0, ptr %arg1) #0 { -; CHECK-LABEL: define void @fixed_iv_step_tc( -; CHECK-SAME: ptr [[ARG0:%.*]], ptr [[ARG1:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[VECTOR_PH:%.*]] -; CHECK: vector.ph: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[ARG0]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = sub i64 87, [[EVL_BASED_IV]] -; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP0]], i32 2, i1 true) -; CHECK-NEXT: [[GEP:%.*]] = getelementptr ptr, ptr [[ARG1]], i64 [[EVL_BASED_IV]] -; CHECK-NEXT: tail call void @llvm.vp.store.nxv2p0.p0( [[BROADCAST_SPLAT]], ptr align 8 [[GEP]], splat (i1 true), i32 [[TMP1]]) -; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 -; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[EVL_BASED_IV]], [[TMP2]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 87 -; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_END_LOOPEXIT5:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4]] -; CHECK: for.end.loopexit5: -; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: for.end: -; CHECK-NEXT: ret void -; -; LOOP-DEL-LABEL: define void @fixed_iv_step_tc( -; LOOP-DEL-SAME: ptr [[ARG0:%.*]], ptr [[ARG1:%.*]]) #[[ATTR1]] { -; LOOP-DEL-NEXT: entry: -; LOOP-DEL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[ARG0]], i64 0 -; LOOP-DEL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; LOOP-DEL-NEXT: br label [[VECTOR_BODY:%.*]] -; LOOP-DEL: vector.body: -; LOOP-DEL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] -; LOOP-DEL-NEXT: [[TMP0:%.*]] = sub i64 87, [[EVL_BASED_IV]] -; LOOP-DEL-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP0]], i32 2, i1 true) -; LOOP-DEL-NEXT: [[GEP:%.*]] = getelementptr ptr, ptr [[ARG1]], i64 [[EVL_BASED_IV]] -; LOOP-DEL-NEXT: tail call void @llvm.vp.store.nxv2p0.p0( [[BROADCAST_SPLAT]], ptr align 8 [[GEP]], splat (i1 true), i32 [[TMP1]]) -; LOOP-DEL-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 -; LOOP-DEL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[EVL_BASED_IV]], [[TMP2]] -; LOOP-DEL-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 87 -; LOOP-DEL-NEXT: br i1 [[TMP3]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4]] -; LOOP-DEL: for.end: -; LOOP-DEL-NEXT: ret void -; -entry: - br label %vector.ph - -vector.ph: - %n.rnd.up = add nsw i64 87, 15 - %n.vec = and i64 %n.rnd.up, -16 - %broadcast.splatinsert = insertelement poison, ptr %arg0, i64 0 - %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer - br label %vector.body - -vector.body: - %lsr.iv32 = phi i64 [ %lsr.iv.next33, %vector.body ], [ %n.vec, %vector.ph ] - %evl.based.iv = phi i64 [ 0, %vector.ph ], [ %index.evl.next, %vector.body ] - %41 = sub i64 87, %evl.based.iv - %42 = tail call i32 @llvm.experimental.get.vector.length.i64(i64 %41, i32 2, i1 true) - %gep = getelementptr ptr, ptr %arg1, i64 %evl.based.iv - tail call void @llvm.vp.store.nxv2p0.p0( %broadcast.splat, ptr align 8 %gep, splat (i1 true), i32 %42) - %43 = zext i32 %42 to i64 - %index.evl.next = add i64 %evl.based.iv, %43 - %lsr.iv.next33 = add i64 %lsr.iv32, -16 - %44 = icmp eq i64 %lsr.iv.next33, 0 - br i1 %44, label %for.end.loopexit5, label %vector.body, !llvm.loop !3 - -for.end.loopexit5: - br label %for.end - -for.end: - ret void -} - -declare i64 @llvm.vscale.i64() - -declare i32 @llvm.experimental.get.vector.length.i64(i64, i32 immarg, i1 immarg) - -declare @llvm.vp.load.nxv4i32.p0(ptr nocapture, , i32) - -declare void @llvm.vp.store.nxv4i32.p0(, ptr nocapture, , i32) - -attributes #0 = { vscale_range(8,8) } - -!0 = distinct !{!0, !1, !2, !4} -!1 = !{!"llvm.loop.isvectorized", i32 1} -!2 = !{!"llvm.loop.unroll.runtime.disable"} -!3 = distinct !{!3, !2, !1, !4} -!4 = !{!"llvm.loop.isvectorized.tailfoldingstyle", !"evl"} -;. -; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]], [[META3:![0-9]+]]} -; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} -; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[META3]] = !{!"llvm.loop.isvectorized.tailfoldingstyle", !"evl"} -; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META2]], [[META1]], [[META3]]} -;. -; LOOP-DEL: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]], [[META3:![0-9]+]]} -; LOOP-DEL: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} -; LOOP-DEL: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; LOOP-DEL: [[META3]] = !{!"llvm.loop.isvectorized.tailfoldingstyle", !"evl"} -; LOOP-DEL: [[LOOP4]] = distinct !{[[LOOP4]], [[META2]], [[META1]], [[META3]]} -;. diff --git a/llvm/test/Transforms/SLPVectorizer/X86/long-pointer-distance.ll b/llvm/test/Transforms/SLPVectorizer/X86/long-pointer-distance.ll index f663d120b136a..9cfafd2784488 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/long-pointer-distance.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/long-pointer-distance.ll @@ -5,13 +5,7 @@ define void @test(ptr %this) { ; CHECK-LABEL: define void @test( ; CHECK-SAME: ptr [[THIS:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: store i64 1, ptr [[THIS]], align 8 -; CHECK-NEXT: [[B:%.*]] = getelementptr i8, ptr [[THIS]], i64 8 -; CHECK-NEXT: store i64 2, ptr [[B]], align 8 -; CHECK-NEXT: [[C:%.*]] = getelementptr i8, ptr [[THIS]], i64 4294967312 -; CHECK-NEXT: store i64 3, ptr [[C]], align 8 -; CHECK-NEXT: [[D:%.*]] = getelementptr i8, ptr [[THIS]], i64 4294967320 -; CHECK-NEXT: store i64 4, ptr [[D]], align 8 +; CHECK-NEXT: store <4 x i64> , ptr [[THIS]], align 8 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi-operand-gathered-loads.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi-operand-gathered-loads.ll deleted file mode 100644 index 57eb1e7173618..0000000000000 --- a/llvm/test/Transforms/SLPVectorizer/X86/phi-operand-gathered-loads.ll +++ /dev/null @@ -1,53 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=cascadelake < %s | FileCheck %s - -%class.btManifoldPoint = type <{ %class.btVector3, %class.btVector3, %class.btVector3, %class.btVector3, %class.btVector3, float, float, float, i32, i32, i32, i32, [4 x i8], ptr, float, i8, [3 x i8], float, float, i32, %class.btVector3, %class.btVector3, [4 x i8] }> -%class.btVector3 = type { [4 x float] } - -define void @test(ptr %this, i1 %cmp4.not) { -; CHECK-LABEL: define void @test( -; CHECK-SAME: ptr [[THIS:%.*]], i1 [[CMP4_NOT:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: [[NEWPT:%.*]] = alloca [[CLASS_BTMANIFOLDPOINT:%.*]], align 8 -; CHECK-NEXT: [[CALL25:%.*]] = load volatile i32, ptr [[NEWPT]], align 4 -; CHECK-NEXT: br i1 [[CMP4_NOT]], label %[[IF_ELSE37:.*]], label %[[IF_END46:.*]] -; CHECK: [[IF_ELSE37]]: -; CHECK-NEXT: br label %[[IF_END46]] -; CHECK: [[IF_END46]]: -; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i64> [ , %[[IF_ELSE37]] ], [ , %[[ENTRY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x ptr> poison, ptr [[THIS]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x ptr> [[TMP1]], <4 x ptr> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <4 x ptr> [[TMP2]], <4 x i64> [[TMP0]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[NEWPT]], i64 92 -; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP3]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) -; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[TMP4]], align 4 -; CHECK-NEXT: ret void -; -entry: - %newPt = alloca %class.btManifoldPoint, align 8 - %call25 = load volatile i32, ptr %newPt, align 4 - br i1 %cmp4.not, label %if.else37, label %if.end46 - -if.else37: ; preds = %entry - br label %if.end46 - -if.end46: ; preds = %if.else37, %entry - %.sink264 = phi i64 [ 160, %if.else37 ], [ 0, %entry ] - %.sink262 = phi i64 [ 0, %if.else37 ], [ 1, %entry ] - %.sink261 = phi i64 [ 1, %if.else37 ], [ 0, %entry ] - %m_partId038 = getelementptr i8, ptr %this, i64 %.sink264 - %m_index042 = getelementptr i8, ptr %this, i64 %.sink262 - %m_index144 = getelementptr i8, ptr %this, i64 %.sink261 - %.sink = load i32, ptr %m_index144, align 4 - %.sink186 = load i32, ptr %m_index042, align 4 - %.sink188 = load i32, ptr %m_partId038, align 4 - %0 = getelementptr i8, ptr %newPt, i64 92 - store i32 %.sink188, ptr %0, align 4 - %1 = getelementptr i8, ptr %newPt, i64 96 - store i32 %.sink, ptr %1, align 8 - %2 = getelementptr i8, ptr %newPt, i64 100 - store i32 %.sink186, ptr %2, align 4 - %3 = getelementptr i8, ptr %newPt, i64 104 - store i32 %.sink, ptr %3, align 8 - ret void -} diff --git a/llvm/test/Transforms/SafeStack/AArch64/abi.ll b/llvm/test/Transforms/SafeStack/AArch64/abi.ll index 70e1ba605c1d6..6d4ca03096825 100644 --- a/llvm/test/Transforms/SafeStack/AArch64/abi.ll +++ b/llvm/test/Transforms/SafeStack/AArch64/abi.ll @@ -4,7 +4,7 @@ define void @foo() nounwind uwtable safestack { entry: -; CHECK: %[[TP:.*]] = call ptr @llvm.thread.pointer.p0() +; CHECK: %[[TP:.*]] = call ptr @llvm.thread.pointer() ; CHECK: %[[SPA0:.*]] = getelementptr i8, ptr %[[TP]], i32 72 ; CHECK: %[[USP:.*]] = load ptr, ptr %[[SPA0]] ; CHECK: %[[USST:.*]] = getelementptr i8, ptr %[[USP]], i32 -16 diff --git a/llvm/test/Transforms/SafeStack/AArch64/abi_ssp.ll b/llvm/test/Transforms/SafeStack/AArch64/abi_ssp.ll index 43fb2605ff646..282d8c4390b65 100644 --- a/llvm/test/Transforms/SafeStack/AArch64/abi_ssp.ll +++ b/llvm/test/Transforms/SafeStack/AArch64/abi_ssp.ll @@ -6,9 +6,9 @@ define void @foo() nounwind uwtable safestack sspreq { entry: ; The first @llvm.thread.pointer is for the unsafe stack pointer, skip it. -; TLS: call ptr @llvm.thread.pointer.p0() +; TLS: call ptr @llvm.thread.pointer() -; TLS: %[[TP2:.*]] = call ptr @llvm.thread.pointer.p0() +; TLS: %[[TP2:.*]] = call ptr @llvm.thread.pointer() ; ANDROID: %[[B:.*]] = getelementptr i8, ptr %[[TP2]], i32 40 ; FUCHSIA: %[[B:.*]] = getelementptr i8, ptr %[[TP2]], i32 -16 ; TLS: %[[StackGuard:.*]] = load ptr, ptr %[[B]] diff --git a/llvm/test/Transforms/SafeStack/AArch64/unreachable.ll b/llvm/test/Transforms/SafeStack/AArch64/unreachable.ll index befdc634b73e5..23fd3bf9d8f21 100644 --- a/llvm/test/Transforms/SafeStack/AArch64/unreachable.ll +++ b/llvm/test/Transforms/SafeStack/AArch64/unreachable.ll @@ -3,7 +3,7 @@ define void @foo() nounwind uwtable safestack { entry: -; CHECK: %[[TP:.*]] = call ptr @llvm.thread.pointer.p0() +; CHECK: %[[TP:.*]] = call ptr @llvm.thread.pointer() ; CHECK: %[[SPA0:.*]] = getelementptr i8, ptr %[[TP]], i32 72 ; CHECK: %[[USP:.*]] = load ptr, ptr %[[SPA0]] ; CHECK: %[[USST:.*]] = getelementptr i8, ptr %[[USP]], i32 -16 diff --git a/llvm/test/tools/llvm-exegesis/AArch64/skip_unsupported_instructions.s b/llvm/test/tools/llvm-exegesis/AArch64/skip_unsupported_instructions.s index 72009756ed1d5..927ee190e803f 100644 --- a/llvm/test/tools/llvm-exegesis/AArch64/skip_unsupported_instructions.s +++ b/llvm/test/tools/llvm-exegesis/AArch64/skip_unsupported_instructions.s @@ -1,9 +1,5 @@ llvm/test/tools/llvm-exegesis/AArch64/skip_unsupported_instructions.s -# TODO: This is failing on some systems that have hardware support for -# pointer authentication. This needs to be fixed before reenabling. -# REQUIRES: disabled - # REQUIRES: aarch64-registered-target # Check for skipping of illegal instruction errors (AUT and LDGM) @@ -11,4 +7,4 @@ llvm/test/tools/llvm-exegesis/AArch64/skip_unsupported_instructions.s # CHECK-AUTIA-NOT: snippet crashed while running: Illegal instruction # RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --opcode-name=LDGM --benchmark-phase=assemble-measured-code 2>&1 | FileCheck %s --check-prefix=CHECK-LDGM -# CHECK-LDGM: LDGM: Unsupported opcode: load tag multiple +# CHECK-LDGM: LDGM: Unsupported opcode: load tag multiple \ No newline at end of file diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index 7a778da2d3a49..2f839199712eb 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -1375,7 +1375,7 @@ static uint64_t dumpARMELFData(uint64_t SectionAddr, uint64_t Index, } static void dumpELFData(uint64_t SectionAddr, uint64_t Index, uint64_t End, - ArrayRef Bytes, raw_ostream &OS) { + ArrayRef Bytes) { // print out data up to 8 bytes at a time in hex and ascii uint8_t AsciiData[9] = {'\0'}; uint8_t Byte; @@ -1383,9 +1383,9 @@ static void dumpELFData(uint64_t SectionAddr, uint64_t Index, uint64_t End, for (; Index < End; ++Index) { if (NumBytes == 0) - OS << format("%8" PRIx64 ":", SectionAddr + Index); + outs() << format("%8" PRIx64 ":", SectionAddr + Index); Byte = Bytes.slice(Index)[0]; - OS << format(" %02x", Byte); + outs() << format(" %02x", Byte); AsciiData[NumBytes] = isPrint(Byte) ? Byte : '.'; uint8_t IndentOffset = 0; @@ -1400,9 +1400,9 @@ static void dumpELFData(uint64_t SectionAddr, uint64_t Index, uint64_t End, } if (NumBytes == 8) { AsciiData[8] = '\0'; - OS << std::string(IndentOffset, ' ') << " "; - OS << reinterpret_cast(AsciiData); - OS << '\n'; + outs() << std::string(IndentOffset, ' ') << " "; + outs() << reinterpret_cast(AsciiData); + outs() << '\n'; NumBytes = 0; } } @@ -1666,7 +1666,7 @@ static void disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj, DisassemblerTarget &PrimaryTarget, std::optional &SecondaryTarget, - SourcePrinter &SP, bool InlineRelocs, raw_ostream &OS) { + SourcePrinter &SP, bool InlineRelocs) { DisassemblerTarget *DT = &PrimaryTarget; bool PrimaryIsThumb = false; SmallVector, 0> CHPECodeMap; @@ -2089,10 +2089,10 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj, if (!PrintedSection) { PrintedSection = true; - OS << "\nDisassembly of section "; + outs() << "\nDisassembly of section "; if (!SegmentName.empty()) - OS << SegmentName << ","; - OS << SectionName << ":\n"; + outs() << SegmentName << ","; + outs() << SectionName << ":\n"; } bool PrintedLabel = false; @@ -2104,22 +2104,22 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj, const StringRef SymbolName = SymNamesHere[i]; if (!PrintedLabel) { - OS << '\n'; + outs() << '\n'; PrintedLabel = true; } if (LeadingAddr) - OS << format(Is64Bits ? "%016" PRIx64 " " : "%08" PRIx64 " ", - SectionAddr + Start + VMAAdjustment); + outs() << format(Is64Bits ? "%016" PRIx64 " " : "%08" PRIx64 " ", + SectionAddr + Start + VMAAdjustment); if (Obj.isXCOFF() && SymbolDescription) { - OS << getXCOFFSymbolDescription(Symbol, SymbolName) << ":\n"; + outs() << getXCOFFSymbolDescription(Symbol, SymbolName) << ":\n"; } else - OS << '<' << SymbolName << ">:\n"; + outs() << '<' << SymbolName << ">:\n"; } // Don't print raw contents of a virtual section. A virtual section // doesn't have any contents in the file. if (Section.isVirtual()) { - OS << "...\n"; + outs() << "...\n"; continue; } @@ -2156,17 +2156,17 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj, do { StringRef Line; std::tie(Line, ErrMsg) = ErrMsg.split('\n'); - OS << DT->Context->getAsmInfo()->getCommentString() - << " error decoding " << SymNamesHere[SHI] << ": " << Line - << '\n'; + outs() << DT->Context->getAsmInfo()->getCommentString() + << " error decoding " << SymNamesHere[SHI] << ": " << Line + << '\n'; } while (!ErrMsg.empty()); if (Size) { - OS << DT->Context->getAsmInfo()->getCommentString() - << " decoding failed region as bytes\n"; + outs() << DT->Context->getAsmInfo()->getCommentString() + << " decoding failed region as bytes\n"; for (uint64_t I = 0; I < Size; ++I) - OS << "\t.byte\t " << format_hex(Bytes[I], 1, /*Upper=*/true) - << '\n'; + outs() << "\t.byte\t " << format_hex(Bytes[I], 1, /*Upper=*/true) + << '\n'; } } @@ -2179,13 +2179,13 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj, Start += Size; break; } - formatted_raw_ostream FOS(OS); + Index = Start; if (SectionAddr < StartAddress) Index = std::max(Index, StartAddress - SectionAddr); if (DisassembleAsELFData) { - dumpELFData(SectionAddr, Index, End, Bytes, FOS); + dumpELFData(SectionAddr, Index, End, Bytes); Index = End; continue; } @@ -2203,6 +2203,8 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj, Symbols[SI - 1].XCOFFSymInfo.StorageMappingClass && (*Symbols[SI - 1].XCOFFSymInfo.StorageMappingClass == XCOFF::XMC_PR); + formatted_raw_ostream FOS(outs()); + std::unordered_map AllLabels; std::unordered_map> BBAddrMapLabels; if (SymbolizeOperands) { @@ -2551,8 +2553,7 @@ disassembleObject(ObjectFile &Obj, const ObjectFile &DbgObj, reportWarning("failed to disassemble missing symbol " + Sym, FileName); } -static void disassembleObject(ObjectFile *Obj, bool InlineRelocs, - raw_ostream &OS) { +static void disassembleObject(ObjectFile *Obj, bool InlineRelocs) { // If information useful for showing the disassembly is missing, try to find a // more complete binary and disassemble that instead. OwningBinary FetchedBinary; @@ -2678,7 +2679,7 @@ static void disassembleObject(ObjectFile *Obj, bool InlineRelocs, "Unrecognized disassembler option: " + Opt); disassembleObject(*Obj, *DbgObj, PrimaryTarget, SecondaryTarget, SP, - InlineRelocs, OS); + InlineRelocs); } void Dumper::printRelocations() { @@ -3339,7 +3340,7 @@ static void dumpObject(ObjectFile *O, const Archive *A = nullptr, if (SectionContents) printSectionContents(O); if (Disassemble) - disassembleObject(O, Relocations, outs()); + disassembleObject(O, Relocations); if (UnwindInfo) printUnwindInfo(O); diff --git a/llvm/unittests/ADT/BitmaskEnumTest.cpp b/llvm/unittests/ADT/BitmaskEnumTest.cpp index b1ef8482416a9..2c0a80342a54c 100644 --- a/llvm/unittests/ADT/BitmaskEnumTest.cpp +++ b/llvm/unittests/ADT/BitmaskEnumTest.cpp @@ -176,17 +176,6 @@ TEST(BitmaskEnumTest, BitwiseNot) { EXPECT_EQ(15, ~V0); } -TEST(BitmaskEnumTest, BooleanNot) { - bool b0 = !F0; - EXPECT_TRUE(b0); - - bool b1 = !(F1 & F2); - EXPECT_TRUE(b1); - - bool b2 = !(F2 | F4); - EXPECT_FALSE(b2); -} - enum class FlagsClass { F0 = 0, F1 = 1, diff --git a/llvm/unittests/Support/FileOutputBufferTest.cpp b/llvm/unittests/Support/FileOutputBufferTest.cpp index 423a6e12240c0..f7bb0833e5a0e 100644 --- a/llvm/unittests/Support/FileOutputBufferTest.cpp +++ b/llvm/unittests/Support/FileOutputBufferTest.cpp @@ -123,7 +123,7 @@ TEST(FileOutputBuffer, Test) { File5.append("/file5"); { Expected> BufferOrErr = - FileOutputBuffer::create(File5, 8000, FileOutputBuffer::F_mmap); + FileOutputBuffer::create(File5, 8000, FileOutputBuffer::F_no_mmap); ASSERT_NO_ERROR(errorToErrorCode(BufferOrErr.takeError())); std::unique_ptr &Buffer = *BufferOrErr; // Start buffer with special header. diff --git a/llvm/unittests/Target/AArch64/SMEAttributesTest.cpp b/llvm/unittests/Target/AArch64/SMEAttributesTest.cpp index f13252f3a4c28..f8c77fcba19cf 100644 --- a/llvm/unittests/Target/AArch64/SMEAttributesTest.cpp +++ b/llvm/unittests/Target/AArch64/SMEAttributesTest.cpp @@ -9,7 +9,6 @@ using namespace llvm; using SA = SMEAttrs; -using CA = SMECallAttrs; std::unique_ptr parseIR(const char *IR) { static LLVMContext C; @@ -71,14 +70,15 @@ TEST(SMEAttributes, Constructors) { ASSERT_TRUE(SA(*parseIR("declare void @foo() \"aarch64_new_zt0\"") ->getFunction("foo")) .isNewZT0()); - - auto CallModule = parseIR("declare void @callee()\n" - "define void @foo() {" - "call void @callee() \"aarch64_zt0_undef\"\n" - "ret void\n}"); - CallBase &Call = - cast((CallModule->getFunction("foo")->begin()->front())); - ASSERT_TRUE(SMECallAttrs(Call).callsite().hasUndefZT0()); + ASSERT_TRUE( + SA(cast((parseIR("declare void @callee()\n" + "define void @foo() {" + "call void @callee() \"aarch64_zt0_undef\"\n" + "ret void\n}") + ->getFunction("foo") + ->begin() + ->front()))) + .isUndefZT0()); // Invalid combinations. EXPECT_DEBUG_DEATH(SA(SA::SM_Enabled | SA::SM_Compatible), @@ -235,7 +235,7 @@ TEST(SMEAttributes, Basics) { ASSERT_TRUE(ZT0_Undef.hasZT0State()); ASSERT_FALSE(ZT0_Undef.hasSharedZAInterface()); ASSERT_TRUE(ZT0_Undef.hasPrivateZAInterface()); - ASSERT_TRUE(ZT0_Undef.hasUndefZT0()); + ASSERT_TRUE(ZT0_Undef.isUndefZT0()); ASSERT_FALSE(SA(SA::Normal).isInZT0()); ASSERT_FALSE(SA(SA::Normal).isOutZT0()); @@ -248,57 +248,59 @@ TEST(SMEAttributes, Basics) { TEST(SMEAttributes, Transitions) { // Normal -> Normal - ASSERT_FALSE(CA(SA::Normal, SA::Normal).requiresSMChange()); - ASSERT_FALSE(CA(SA::Normal, SA::Normal).requiresPreservingZT0()); - ASSERT_FALSE(CA(SA::Normal, SA::Normal).requiresDisablingZABeforeCall()); - ASSERT_FALSE(CA(SA::Normal, SA::Normal).requiresEnablingZAAfterCall()); + ASSERT_FALSE(SA(SA::Normal).requiresSMChange(SA(SA::Normal))); + ASSERT_FALSE(SA(SA::Normal).requiresPreservingZT0(SA(SA::Normal))); + ASSERT_FALSE(SA(SA::Normal).requiresDisablingZABeforeCall(SA(SA::Normal))); + ASSERT_FALSE(SA(SA::Normal).requiresEnablingZAAfterCall(SA(SA::Normal))); // Normal -> Normal + LocallyStreaming - ASSERT_FALSE(CA(SA::Normal, SA::Normal | SA::SM_Body).requiresSMChange()); + ASSERT_FALSE(SA(SA::Normal).requiresSMChange(SA(SA::Normal | SA::SM_Body))); // Normal -> Streaming - ASSERT_TRUE(CA(SA::Normal, SA::SM_Enabled).requiresSMChange()); + ASSERT_TRUE(SA(SA::Normal).requiresSMChange(SA(SA::SM_Enabled))); // Normal -> Streaming + LocallyStreaming - ASSERT_TRUE(CA(SA::Normal, SA::SM_Enabled | SA::SM_Body).requiresSMChange()); + ASSERT_TRUE( + SA(SA::Normal).requiresSMChange(SA(SA::SM_Enabled | SA::SM_Body))); // Normal -> Streaming-compatible - ASSERT_FALSE(CA(SA::Normal, SA::SM_Compatible).requiresSMChange()); + ASSERT_FALSE(SA(SA::Normal).requiresSMChange(SA(SA::SM_Compatible))); // Normal -> Streaming-compatible + LocallyStreaming ASSERT_FALSE( - CA(SA::Normal, SA::SM_Compatible | SA::SM_Body).requiresSMChange()); + SA(SA::Normal).requiresSMChange(SA(SA::SM_Compatible | SA::SM_Body))); // Streaming -> Normal - ASSERT_TRUE(CA(SA::SM_Enabled, SA::Normal).requiresSMChange()); + ASSERT_TRUE(SA(SA::SM_Enabled).requiresSMChange(SA(SA::Normal))); // Streaming -> Normal + LocallyStreaming - ASSERT_TRUE(CA(SA::SM_Enabled, SA::Normal | SA::SM_Body).requiresSMChange()); + ASSERT_TRUE( + SA(SA::SM_Enabled).requiresSMChange(SA(SA::Normal | SA::SM_Body))); // Streaming -> Streaming - ASSERT_FALSE(CA(SA::SM_Enabled, SA::SM_Enabled).requiresSMChange()); + ASSERT_FALSE(SA(SA::SM_Enabled).requiresSMChange(SA(SA::SM_Enabled))); // Streaming -> Streaming + LocallyStreaming ASSERT_FALSE( - CA(SA::SM_Enabled, SA::SM_Enabled | SA::SM_Body).requiresSMChange()); + SA(SA::SM_Enabled).requiresSMChange(SA(SA::SM_Enabled | SA::SM_Body))); // Streaming -> Streaming-compatible - ASSERT_FALSE(CA(SA::SM_Enabled, SA::SM_Compatible).requiresSMChange()); + ASSERT_FALSE(SA(SA::SM_Enabled).requiresSMChange(SA(SA::SM_Compatible))); // Streaming -> Streaming-compatible + LocallyStreaming ASSERT_FALSE( - CA(SA::SM_Enabled, SA::SM_Compatible | SA::SM_Body).requiresSMChange()); + SA(SA::SM_Enabled).requiresSMChange(SA(SA::SM_Compatible | SA::SM_Body))); // Streaming-compatible -> Normal - ASSERT_TRUE(CA(SA::SM_Compatible, SA::Normal).requiresSMChange()); + ASSERT_TRUE(SA(SA::SM_Compatible).requiresSMChange(SA(SA::Normal))); ASSERT_TRUE( - CA(SA::SM_Compatible, SA::Normal | SA::SM_Body).requiresSMChange()); + SA(SA::SM_Compatible).requiresSMChange(SA(SA::Normal | SA::SM_Body))); // Streaming-compatible -> Streaming - ASSERT_TRUE(CA(SA::SM_Compatible, SA::SM_Enabled).requiresSMChange()); + ASSERT_TRUE(SA(SA::SM_Compatible).requiresSMChange(SA(SA::SM_Enabled))); // Streaming-compatible -> Streaming + LocallyStreaming ASSERT_TRUE( - CA(SA::SM_Compatible, SA::SM_Enabled | SA::SM_Body).requiresSMChange()); + SA(SA::SM_Compatible).requiresSMChange(SA(SA::SM_Enabled | SA::SM_Body))); // Streaming-compatible -> Streaming-compatible - ASSERT_FALSE(CA(SA::SM_Compatible, SA::SM_Compatible).requiresSMChange()); + ASSERT_FALSE(SA(SA::SM_Compatible).requiresSMChange(SA(SA::SM_Compatible))); // Streaming-compatible -> Streaming-compatible + LocallyStreaming - ASSERT_FALSE(CA(SA::SM_Compatible, SA::SM_Compatible | SA::SM_Body) - .requiresSMChange()); + ASSERT_FALSE(SA(SA::SM_Compatible) + .requiresSMChange(SA(SA::SM_Compatible | SA::SM_Body))); SA Private_ZA = SA(SA::Normal); SA ZA_Shared = SA(SA::encodeZAState(SA::StateValue::In)); @@ -308,39 +310,37 @@ TEST(SMEAttributes, Transitions) { SA Undef_ZT0 = SA(SA::ZT0_Undef); // Shared ZA -> Private ZA Interface - ASSERT_FALSE(CA(ZA_Shared, Private_ZA).requiresDisablingZABeforeCall()); - ASSERT_TRUE(CA(ZA_Shared, Private_ZA).requiresEnablingZAAfterCall()); + ASSERT_FALSE(ZA_Shared.requiresDisablingZABeforeCall(Private_ZA)); + ASSERT_TRUE(ZA_Shared.requiresEnablingZAAfterCall(Private_ZA)); // Shared ZT0 -> Private ZA Interface - ASSERT_TRUE(CA(ZT0_Shared, Private_ZA).requiresDisablingZABeforeCall()); - ASSERT_TRUE(CA(ZT0_Shared, Private_ZA).requiresPreservingZT0()); - ASSERT_TRUE(CA(ZT0_Shared, Private_ZA).requiresEnablingZAAfterCall()); + ASSERT_TRUE(ZT0_Shared.requiresDisablingZABeforeCall(Private_ZA)); + ASSERT_TRUE(ZT0_Shared.requiresPreservingZT0(Private_ZA)); + ASSERT_TRUE(ZT0_Shared.requiresEnablingZAAfterCall(Private_ZA)); // Shared Undef ZT0 -> Private ZA Interface // Note: "Undef ZT0" is a callsite attribute that means ZT0 is undefined at // point the of the call. - ASSERT_TRUE( - CA(ZT0_Shared, Private_ZA, Undef_ZT0).requiresDisablingZABeforeCall()); - ASSERT_FALSE(CA(ZT0_Shared, Private_ZA, Undef_ZT0).requiresPreservingZT0()); - ASSERT_TRUE( - CA(ZT0_Shared, Private_ZA, Undef_ZT0).requiresEnablingZAAfterCall()); + ASSERT_TRUE(ZT0_Shared.requiresDisablingZABeforeCall(Undef_ZT0)); + ASSERT_FALSE(ZT0_Shared.requiresPreservingZT0(Undef_ZT0)); + ASSERT_TRUE(ZT0_Shared.requiresEnablingZAAfterCall(Undef_ZT0)); // Shared ZA & ZT0 -> Private ZA Interface - ASSERT_FALSE(CA(ZA_ZT0_Shared, Private_ZA).requiresDisablingZABeforeCall()); - ASSERT_TRUE(CA(ZA_ZT0_Shared, Private_ZA).requiresPreservingZT0()); - ASSERT_TRUE(CA(ZA_ZT0_Shared, Private_ZA).requiresEnablingZAAfterCall()); + ASSERT_FALSE(ZA_ZT0_Shared.requiresDisablingZABeforeCall(Private_ZA)); + ASSERT_TRUE(ZA_ZT0_Shared.requiresPreservingZT0(Private_ZA)); + ASSERT_TRUE(ZA_ZT0_Shared.requiresEnablingZAAfterCall(Private_ZA)); // Shared ZA -> Shared ZA Interface - ASSERT_FALSE(CA(ZA_Shared, ZT0_Shared).requiresDisablingZABeforeCall()); - ASSERT_FALSE(CA(ZA_Shared, ZT0_Shared).requiresEnablingZAAfterCall()); + ASSERT_FALSE(ZA_Shared.requiresDisablingZABeforeCall(ZT0_Shared)); + ASSERT_FALSE(ZA_Shared.requiresEnablingZAAfterCall(ZT0_Shared)); // Shared ZT0 -> Shared ZA Interface - ASSERT_FALSE(CA(ZT0_Shared, ZT0_Shared).requiresDisablingZABeforeCall()); - ASSERT_FALSE(CA(ZT0_Shared, ZT0_Shared).requiresPreservingZT0()); - ASSERT_FALSE(CA(ZT0_Shared, ZT0_Shared).requiresEnablingZAAfterCall()); + ASSERT_FALSE(ZT0_Shared.requiresDisablingZABeforeCall(ZT0_Shared)); + ASSERT_FALSE(ZT0_Shared.requiresPreservingZT0(ZT0_Shared)); + ASSERT_FALSE(ZT0_Shared.requiresEnablingZAAfterCall(ZT0_Shared)); // Shared ZA & ZT0 -> Shared ZA Interface - ASSERT_FALSE(CA(ZA_ZT0_Shared, ZT0_Shared).requiresDisablingZABeforeCall()); - ASSERT_FALSE(CA(ZA_ZT0_Shared, ZT0_Shared).requiresPreservingZT0()); - ASSERT_FALSE(CA(ZA_ZT0_Shared, ZT0_Shared).requiresEnablingZAAfterCall()); + ASSERT_FALSE(ZA_ZT0_Shared.requiresDisablingZABeforeCall(ZT0_Shared)); + ASSERT_FALSE(ZA_ZT0_Shared.requiresPreservingZT0(ZT0_Shared)); + ASSERT_FALSE(ZA_ZT0_Shared.requiresEnablingZAAfterCall(ZT0_Shared)); } diff --git a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp index fc6854a483f6f..2a53f8469b8fa 100644 --- a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp +++ b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp @@ -981,7 +981,6 @@ R"(All available -march extensions for RISC-V a 2.1 f 2.2 d 2.2 - q 2.2 c 2.0 b 1.0 v 1.0 @@ -1129,7 +1128,6 @@ R"(All available -march extensions for RISC-V svpbmt 1.0 svvptc 1.0 xandesperf 5.0 - xandesvdot 5.0 xandesvpackfph 5.0 xcvalu 1.0 xcvbi 1.0 diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp index f0d943fe8f304..eec7b4480b75d 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -1247,9 +1247,8 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) { { // Test for a call to a function without side-effects. Module M("", C); - PointerType *PtrTy = PointerType::get(C, 0); Function *TheFn = - Intrinsic::getOrInsertDeclaration(&M, Intrinsic::thread_pointer, PtrTy); + Intrinsic::getOrInsertDeclaration(&M, Intrinsic::thread_pointer); auto *Call = CallInst::Create(TheFn->getFunctionType(), TheFn); VPValue *Op1 = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1)); diff --git a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp index 0cce111ccd22c..84b7e33146811 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp @@ -143,44 +143,6 @@ TEST_F(VPVerifierTest, VPBlendUseBeforeDefDifferentBB) { delete Phi; } -TEST_F(VPVerifierTest, VPPhiIncomingValueDoesntDominateIncomingBlock) { - VPlan &Plan = getPlan(); - IntegerType *Int32 = IntegerType::get(C, 32); - VPValue *Zero = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 0)); - - VPBasicBlock *VPBB1 = Plan.getEntry(); - VPBasicBlock *VPBB2 = Plan.createVPBasicBlock(""); - VPBasicBlock *VPBB3 = Plan.createVPBasicBlock(""); - - VPInstruction *DefI = new VPInstruction(Instruction::Add, {Zero}); - VPPhi *Phi = new VPPhi({DefI}, {}); - VPBB2->appendRecipe(Phi); - VPBB2->appendRecipe(DefI); - auto *CanIV = new VPCanonicalIVPHIRecipe(Zero, {}); - VPBB3->appendRecipe(CanIV); - - VPRegionBlock *R1 = Plan.createVPRegionBlock(VPBB3, VPBB3, "R1"); - VPBlockUtils::connectBlocks(VPBB1, VPBB2); - VPBlockUtils::connectBlocks(VPBB2, R1); -#if GTEST_HAS_STREAM_REDIRECTION - ::testing::internal::CaptureStderr(); -#endif - EXPECT_FALSE(verifyVPlanIsValid(Plan)); -#if GTEST_HAS_STREAM_REDIRECTION -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - EXPECT_STREQ("Incoming def at index 0 does not dominate incoming block!\n" - " EMIT vp<%2> = add ir<0>\n" - " does not dominate preheader for\n" - " EMIT vp<%1> = phi [ vp<%2>, preheader ]", - ::testing::internal::GetCapturedStderr().c_str()); -#else - EXPECT_STREQ("Incoming def at index 0 does not dominate incoming block!\n", :: - testing::internal::GetCapturedStderr() - .c_str()); -#endif -#endif -} - TEST_F(VPVerifierTest, DuplicateSuccessorsOutsideRegion) { VPlan &Plan = getPlan(); VPValue *Zero = Plan.getOrAddLiveIn(ConstantInt::get(Type::getInt32Ty(C), 0)); diff --git a/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp b/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp index df37d7005215e..339b8d6acd622 100644 --- a/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp +++ b/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp @@ -77,48 +77,6 @@ static void generateEnumClass(ArrayRef Records, raw_ostream &OS, } } -// Generate enum class with values corresponding to different bit positions. -// Entries are emitted in the order in which they appear in the `Records` -// vector. -static void generateEnumBitmask(ArrayRef Records, - raw_ostream &OS, StringRef Enum, - StringRef Prefix, - const DirectiveLanguage &DirLang, - bool ExportEnums) { - assert(Records.size() <= 64 && "Too many values for a bitmask"); - llvm::StringRef Type = Records.size() <= 32 ? "uint32_t" : "uint64_t"; - llvm::StringRef TypeSuffix = Records.size() <= 32 ? "U" : "ULL"; - - OS << "\n"; - OS << "enum class " << Enum << " : " << Type << " {\n"; - std::string LastName; - for (auto [I, R] : llvm::enumerate(Records)) { - BaseRecord Rec(R); - LastName = Prefix.str() + Rec.getFormattedName(); - OS << " " << LastName << " = " << (1ull << I) << TypeSuffix << ",\n"; - } - OS << " LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/" << LastName << ")\n"; - OS << "};\n"; - OS << "\n"; - OS << "static constexpr std::size_t " << Enum - << "_enumSize = " << Records.size() << ";\n"; - - // Make the enum values available in the defined namespace. This allows us to - // write something like Enum_X if we have a `using namespace `. - // At the same time we do not loose the strong type guarantees of the enum - // class, that is we cannot pass an unsigned as Directive without an explicit - // cast. - if (ExportEnums) { - OS << "\n"; - for (const auto &R : Records) { - BaseRecord Rec(R); - OS << "constexpr auto " << Prefix << Rec.getFormattedName() << " = " - << "llvm::" << DirLang.getCppNamespace() << "::" << Enum - << "::" << Prefix << Rec.getFormattedName() << ";\n"; - } - } -} - // Generate enums for values that clauses can take. // Also generate function declarations for getName(StringRef Str). static void generateEnumClauseVal(ArrayRef Records, @@ -266,9 +224,6 @@ static void emitDirectivesDecl(const RecordKeeper &Records, raw_ostream &OS) { generateEnumClass(DirLang.getCategories(), OS, "Category", /*Prefix=*/"", DirLang, /*ExportEnums=*/false); - generateEnumBitmask(DirLang.getSourceLanguages(), OS, "SourceLanguage", - /*Prefix=*/"", DirLang, /*ExportEnums=*/false); - // Emit Directive enumeration generateEnumClass(DirLang.getDirectives(), OS, "Directive", DirLang.getDirectivePrefix(), DirLang, @@ -312,7 +267,6 @@ static void emitDirectivesDecl(const RecordKeeper &Records, raw_ostream &OS) { << getMaxLeafCount(DirLang) << "; }\n"; OS << "LLVM_ABI Association getDirectiveAssociation(Directive D);\n"; OS << "LLVM_ABI Category getDirectiveCategory(Directive D);\n"; - OS << "LLVM_ABI SourceLanguage getDirectiveLanguages(Directive D);\n"; if (EnumHelperFuncs.length() > 0) { OS << EnumHelperFuncs; OS << "\n"; @@ -810,34 +764,6 @@ static void generateGetDirectiveCategory(const DirectiveLanguage &DirLang, OS << "}\n"; } -static void generateGetDirectiveLanguages(const DirectiveLanguage &DirLang, - raw_ostream &OS) { - std::string LangNamespace = "llvm::" + DirLang.getCppNamespace().str(); - std::string LanguageTypeName = LangNamespace + "::SourceLanguage"; - std::string LanguageNamespace = LanguageTypeName + "::"; - - OS << '\n'; - OS << LanguageTypeName << ' ' << LangNamespace << "::getDirectiveLanguages(" - << getDirectiveType(DirLang) << " D) {\n"; - OS << " switch (D) {\n"; - - for (const Record *R : DirLang.getDirectives()) { - Directive D(R); - OS << " case " << getDirectiveName(DirLang, R) << ":\n"; - OS << " return "; - llvm::interleave( - D.getSourceLanguages(), OS, - [&](const Record *L) { - OS << LanguageNamespace << BaseRecord::getFormattedName(L); - }, - " | "); - OS << ";\n"; - } - OS << " } // switch(D)\n"; - OS << " llvm_unreachable(\"Unexpected directive\");\n"; - OS << "}\n"; -} - namespace { enum class DirectiveClauseFE { Flang, Clang }; @@ -1338,9 +1264,6 @@ void emitDirectivesBasicImpl(const DirectiveLanguage &DirLang, // getDirectiveCategory(Directive D) generateGetDirectiveCategory(DirLang, OS); - // getDirectiveLanguages(Directive D) - generateGetDirectiveLanguages(DirLang, OS); - // Leaf table for getLeafConstructs, etc. emitLeafTable(DirLang, OS, "LeafConstructTable"); } diff --git a/llvm/utils/TableGen/FastISelEmitter.cpp b/llvm/utils/TableGen/FastISelEmitter.cpp index a8b6f79c176a7..9aa6ec1064276 100644 --- a/llvm/utils/TableGen/FastISelEmitter.cpp +++ b/llvm/utils/TableGen/FastISelEmitter.cpp @@ -35,7 +35,7 @@ using namespace llvm; /// namespace { struct InstructionMemo { - StringRef Name; + std::string Name; const CodeGenRegisterClass *RC; std::string SubRegNo; std::vector PhysRegs; @@ -71,7 +71,10 @@ class ImmPredicateSet { return Entry - 1; } - const TreePredicateFn &getPredicate(unsigned Idx) { return PredsByName[Idx]; } + const TreePredicateFn &getPredicate(unsigned i) { + assert(i < PredsByName.size()); + return PredsByName[i]; + } typedef std::vector::const_iterator iterator; iterator begin() const { return PredsByName.begin(); } @@ -148,33 +151,37 @@ struct OperandsSignature { bool empty() const { return Operands.empty(); } bool hasAnyImmediateCodes() const { - return llvm::any_of(Operands, [](OpKind Kind) { - return Kind.isImm() && Kind.getImmCode() != 0; - }); + for (unsigned i = 0, e = Operands.size(); i != e; ++i) + if (Operands[i].isImm() && Operands[i].getImmCode() != 0) + return true; + return false; } /// getWithoutImmCodes - Return a copy of this with any immediate codes forced /// to zero. OperandsSignature getWithoutImmCodes() const { OperandsSignature Result; - Result.Operands.resize(Operands.size()); - llvm::transform(Operands, Result.Operands.begin(), [](OpKind Kind) { - return Kind.isImm() ? OpKind::getImm(0) : Kind; - }); + for (unsigned i = 0, e = Operands.size(); i != e; ++i) + if (!Operands[i].isImm()) + Result.Operands.push_back(Operands[i]); + else + Result.Operands.push_back(OpKind::getImm(0)); return Result; } - void emitImmediatePredicate(raw_ostream &OS, - ImmPredicateSet &ImmPredicates) const { - ListSeparator LS(" &&\n "); - for (auto [Idx, Opnd] : enumerate(Operands)) { - if (!Opnd.isImm()) + void emitImmediatePredicate(raw_ostream &OS, ImmPredicateSet &ImmPredicates) { + bool EmittedAnything = false; + for (unsigned i = 0, e = Operands.size(); i != e; ++i) { + if (!Operands[i].isImm()) continue; - unsigned Code = Opnd.getImmCode(); + unsigned Code = Operands[i].getImmCode(); if (Code == 0) continue; + if (EmittedAnything) + OS << " &&\n "; + TreePredicateFn PredFn = ImmPredicates.getPredicate(Code - 1); // Emit the type check. @@ -182,9 +189,10 @@ struct OperandsSignature { ValueTypeByHwMode VVT = TP->getTree(0)->getType(0); assert(VVT.isSimple() && "Cannot use variable value types with fast isel"); - OS << LS << "VT == " << getEnumName(VVT.getSimple().SimpleTy) << " && "; + OS << "VT == " << getEnumName(VVT.getSimple().SimpleTy) << " && "; - OS << PredFn.getFnName() << "(imm" << Idx << ')'; + OS << PredFn.getFnName() << "(imm" << i << ')'; + EmittedAnything = true; } } @@ -296,74 +304,77 @@ struct OperandsSignature { void PrintParameters(raw_ostream &OS) const { ListSeparator LS; - for (auto [Idx, Opnd] : enumerate(Operands)) { + for (unsigned i = 0, e = Operands.size(); i != e; ++i) { OS << LS; - if (Opnd.isReg()) - OS << "Register Op" << Idx; - else if (Opnd.isImm()) - OS << "uint64_t imm" << Idx; - else if (Opnd.isFP()) - OS << "const ConstantFP *f" << Idx; - else + if (Operands[i].isReg()) { + OS << "Register Op" << i; + } else if (Operands[i].isImm()) { + OS << "uint64_t imm" << i; + } else if (Operands[i].isFP()) { + OS << "const ConstantFP *f" << i; + } else { llvm_unreachable("Unknown operand kind!"); + } } } - void PrintArguments(raw_ostream &OS, ArrayRef PhyRegs) const { + void PrintArguments(raw_ostream &OS, + const std::vector &PR) const { + assert(PR.size() == Operands.size()); ListSeparator LS; - for (auto [Idx, Opnd, PhyReg] : enumerate(Operands, PhyRegs)) { - if (!PhyReg.empty()) { + for (unsigned i = 0, e = Operands.size(); i != e; ++i) { + if (PR[i] != "") // Implicit physical register operand. continue; - } OS << LS; - if (Opnd.isReg()) - OS << "Op" << Idx; - else if (Opnd.isImm()) - OS << "imm" << Idx; - else if (Opnd.isFP()) - OS << "f" << Idx; - else + if (Operands[i].isReg()) { + OS << "Op" << i; + } else if (Operands[i].isImm()) { + OS << "imm" << i; + } else if (Operands[i].isFP()) { + OS << "f" << i; + } else { llvm_unreachable("Unknown operand kind!"); + } } } void PrintArguments(raw_ostream &OS) const { ListSeparator LS; - for (auto [Idx, Opnd] : enumerate(Operands)) { + for (unsigned i = 0, e = Operands.size(); i != e; ++i) { OS << LS; - if (Opnd.isReg()) - OS << "Op" << Idx; - else if (Opnd.isImm()) - OS << "imm" << Idx; - else if (Opnd.isFP()) - OS << "f" << Idx; - else + if (Operands[i].isReg()) { + OS << "Op" << i; + } else if (Operands[i].isImm()) { + OS << "imm" << i; + } else if (Operands[i].isFP()) { + OS << "f" << i; + } else { llvm_unreachable("Unknown operand kind!"); + } } } - void PrintManglingSuffix(raw_ostream &OS, ArrayRef PhyRegs, + void PrintManglingSuffix(raw_ostream &OS, const std::vector &PR, ImmPredicateSet &ImmPredicates, bool StripImmCodes = false) const { - for (auto [PhyReg, Opnd] : zip_equal(PhyRegs, Operands)) { - if (!PhyReg.empty()) { + for (unsigned i = 0, e = Operands.size(); i != e; ++i) { + if (PR[i] != "") // Implicit physical register operand. e.g. Instruction::Mul expect to // select to a binary op. On x86, mul may take a single operand with // the other operand being implicit. We must emit something that looks // like a binary instruction except for the very inner fastEmitInst_* // call. continue; - } - Opnd.printManglingSuffix(OS, ImmPredicates, StripImmCodes); + Operands[i].printManglingSuffix(OS, ImmPredicates, StripImmCodes); } } void PrintManglingSuffix(raw_ostream &OS, ImmPredicateSet &ImmPredicates, bool StripImmCodes = false) const { - for (OpKind Opnd : Operands) - Opnd.printManglingSuffix(OS, ImmPredicates, StripImmCodes); + for (unsigned i = 0, e = Operands.size(); i != e; ++i) + Operands[i].printManglingSuffix(OS, ImmPredicates, StripImmCodes); } }; } // End anonymous namespace @@ -375,14 +386,14 @@ class FastISelMap { typedef std::multimap PredMap; typedef std::map RetPredMap; typedef std::map TypeRetPredMap; - typedef std::map OpcodeTypeRetPredMap; + typedef std::map OpcodeTypeRetPredMap; typedef std::map OperandsOpcodeTypeRetPredMap; OperandsOpcodeTypeRetPredMap SimplePatterns; // This is used to check that there are no duplicate predicates - std::set> SimplePatternsCheck; @@ -401,16 +412,20 @@ class FastISelMap { private: void emitInstructionCode(raw_ostream &OS, const OperandsSignature &Operands, - const PredMap &PM, StringRef RetVTName); + const PredMap &PM, const std::string &RetVTName); }; } // End anonymous namespace -static std::string getLegalCName(StringRef OpName) { - std::string CName = OpName.str(); - std::string::size_type Pos = CName.find("::"); - if (Pos != std::string::npos) - CName.replace(Pos, 2, "_"); - return CName; +static std::string getOpcodeName(const Record *Op, + const CodeGenDAGPatterns &CGP) { + return CGP.getSDNodeInfo(Op).getEnumName().str(); +} + +static std::string getLegalCName(std::string OpName) { + std::string::size_type pos = OpName.find("::"); + if (pos != std::string::npos) + OpName.replace(pos, 2, "_"); + return OpName; } FastISelMap::FastISelMap(StringRef instns) : InstNS(instns) {} @@ -437,7 +452,10 @@ void FastISelMap::collectPatterns(const CodeGenDAGPatterns &CGP) { const CodeGenTarget &Target = CGP.getTargetInfo(); // Scan through all the patterns and record the simple ones. - for (const PatternToMatch &Pattern : CGP.ptms()) { + for (CodeGenDAGPatterns::ptm_iterator I = CGP.ptm_begin(), E = CGP.ptm_end(); + I != E; ++I) { + const PatternToMatch &Pattern = *I; + // For now, just look at Instructions, so that we don't have to worry // about emitting multiple instructions for a pattern. TreePatternNode &Dst = Pattern.getDstPattern(); @@ -446,15 +464,15 @@ void FastISelMap::collectPatterns(const CodeGenDAGPatterns &CGP) { const Record *Op = Dst.getOperator(); if (!Op->isSubClassOf("Instruction")) continue; - CodeGenInstruction &Inst = CGP.getTargetInfo().getInstruction(Op); - if (Inst.Operands.empty()) + CodeGenInstruction &II = CGP.getTargetInfo().getInstruction(Op); + if (II.Operands.empty()) continue; // Allow instructions to be marked as unavailable for FastISel for // certain cases, i.e. an ISA has two 'and' instruction which differ // by what registers they can use but are otherwise identical for // codegen purposes. - if (Inst.FastISelShouldIgnore) + if (II.FastISelShouldIgnore) continue; // For now, ignore multi-instruction patterns. @@ -475,7 +493,7 @@ void FastISelMap::collectPatterns(const CodeGenDAGPatterns &CGP) { const CodeGenRegisterClass *DstRC = nullptr; std::string SubRegNo; if (Op->getName() != "EXTRACT_SUBREG") { - const Record *Op0Rec = Inst.Operands[0].Rec; + const Record *Op0Rec = II.Operands[0].Rec; if (Op0Rec->isSubClassOf("RegisterOperand")) Op0Rec = Op0Rec->getValueAsDef("RegClass"); if (!Op0Rec->isSubClassOf("RegisterClass")) @@ -506,7 +524,7 @@ void FastISelMap::collectPatterns(const CodeGenDAGPatterns &CGP) { continue; const Record *InstPatOp = InstPatNode.getOperator(); - StringRef OpcodeName = CGP.getSDNodeInfo(InstPatOp).getEnumName(); + std::string OpcodeName = getOpcodeName(InstPatOp, CGP); MVT::SimpleValueType RetVT = MVT::isVoid; if (InstPatNode.getNumTypes()) RetVT = InstPatNode.getSimpleType(0); @@ -573,7 +591,7 @@ void FastISelMap::collectPatterns(const CodeGenDAGPatterns &CGP) { DstRC, std::move(SubRegNo), std::move(PhysRegInputs), PredicateCheck); - int Complexity = Pattern.getPatternComplexity(CGP); + int complexity = Pattern.getPatternComplexity(CGP); auto inserted_simple_pattern = SimplePatternsCheck.insert( {Operands, OpcodeName, VT, RetVT, PredicateCheck}); @@ -584,7 +602,7 @@ void FastISelMap::collectPatterns(const CodeGenDAGPatterns &CGP) { // Note: Instructions with the same complexity will appear in the order // that they are encountered. - SimplePatterns[Operands][OpcodeName][VT][RetVT].emplace(Complexity, + SimplePatterns[Operands][OpcodeName][VT][RetVT].emplace(complexity, std::move(Memo)); // If any of the operands were immediates with predicates on them, strip @@ -613,13 +631,16 @@ void FastISelMap::printImmediatePredicates(raw_ostream &OS) { void FastISelMap::emitInstructionCode(raw_ostream &OS, const OperandsSignature &Operands, - const PredMap &PM, StringRef RetVTName) { + const PredMap &PM, + const std::string &RetVTName) { // Emit code for each possible instruction. There may be // multiple if there are subtarget concerns. A reverse iterator // is used to produce the ones with highest complexity first. bool OneHadNoPredicate = false; - for (const auto &[_, Memo] : reverse(PM)) { + for (PredMap::const_reverse_iterator PI = PM.rbegin(), PE = PM.rend(); + PI != PE; ++PI) { + const InstructionMemo &Memo = PI->second; std::string PredicateCheck = Memo.PredicateCheck; if (PredicateCheck.empty()) { @@ -638,11 +659,11 @@ void FastISelMap::emitInstructionCode(raw_ostream &OS, OS << " "; } - for (auto [Idx, PhyReg] : enumerate(Memo.PhysRegs)) { - if (!PhyReg.empty()) + for (unsigned i = 0; i < Memo.PhysRegs.size(); ++i) { + if (Memo.PhysRegs[i] != "") OS << " BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, " - << "TII.get(TargetOpcode::COPY), " << PhyReg << ").addReg(Op" << Idx - << ");\n"; + << "TII.get(TargetOpcode::COPY), " << Memo.PhysRegs[i] + << ").addReg(Op" << i << ");\n"; } OS << " return fastEmitInst_"; @@ -660,8 +681,9 @@ void FastISelMap::emitInstructionCode(raw_ostream &OS, << ");\n"; } - if (!PredicateCheck.empty()) + if (!PredicateCheck.empty()) { OS << " }\n"; + } } // Return Register() if all of the possibilities had predicates but none // were satisfied. @@ -677,38 +699,48 @@ void FastISelMap::printFunctionDefinitions(raw_ostream &OS) { const OperandsSignature &Operands = SimplePattern.first; const OpcodeTypeRetPredMap &OTM = SimplePattern.second; - for (const auto &[Opcode, TM] : OTM) { + for (const auto &I : OTM) { + const std::string &Opcode = I.first; + const TypeRetPredMap &TM = I.second; + OS << "// FastEmit functions for " << Opcode << ".\n"; OS << "\n"; // Emit one function for each opcode,type pair. - for (const auto &[VT, RM] : TM) { + for (const auto &TI : TM) { + MVT::SimpleValueType VT = TI.first; + const RetPredMap &RM = TI.second; if (RM.size() != 1) { - for (const auto &[RetVT, PM] : RM) { + for (const auto &RI : RM) { + MVT::SimpleValueType RetVT = RI.first; + const PredMap &PM = RI.second; + OS << "Register fastEmit_" << getLegalCName(Opcode) << "_" - << getLegalCName(getEnumName(VT)) << "_" - << getLegalCName(getEnumName(RetVT)) << "_"; + << getLegalCName(getEnumName(VT).str()) << "_" + << getLegalCName(getEnumName(RetVT).str()) << "_"; Operands.PrintManglingSuffix(OS, ImmediatePredicates); OS << "("; Operands.PrintParameters(OS); OS << ") {\n"; - emitInstructionCode(OS, Operands, PM, getEnumName(RetVT)); + emitInstructionCode(OS, Operands, PM, getEnumName(RetVT).str()); } // Emit one function for the type that demultiplexes on return type. OS << "Register fastEmit_" << getLegalCName(Opcode) << "_" - << getLegalCName(getEnumName(VT)) << "_"; + << getLegalCName(getEnumName(VT).str()) << "_"; Operands.PrintManglingSuffix(OS, ImmediatePredicates); OS << "(MVT RetVT"; if (!Operands.empty()) OS << ", "; Operands.PrintParameters(OS); OS << ") {\nswitch (RetVT.SimpleTy) {\n"; - for (const auto &[RetVT, _] : RM) { + for (const auto &RI : RM) { + MVT::SimpleValueType RetVT = RI.first; OS << " case " << getEnumName(RetVT) << ": return fastEmit_" - << getLegalCName(Opcode) << "_" << getLegalCName(getEnumName(VT)) - << "_" << getLegalCName(getEnumName(RetVT)) << "_"; + << getLegalCName(Opcode) << "_" + << getLegalCName(getEnumName(VT).str()) << "_" + << getLegalCName(getEnumName(RetVT).str()) << "_"; Operands.PrintManglingSuffix(OS, ImmediatePredicates); OS << "("; Operands.PrintArguments(OS); @@ -719,7 +751,7 @@ void FastISelMap::printFunctionDefinitions(raw_ostream &OS) { } else { // Non-variadic return type. OS << "Register fastEmit_" << getLegalCName(Opcode) << "_" - << getLegalCName(getEnumName(VT)) << "_"; + << getLegalCName(getEnumName(VT).str()) << "_"; Operands.PrintManglingSuffix(OS, ImmediatePredicates); OS << "(MVT RetVT"; if (!Operands.empty()) @@ -745,8 +777,9 @@ void FastISelMap::printFunctionDefinitions(raw_ostream &OS) { Operands.PrintParameters(OS); OS << ") {\n"; OS << " switch (VT.SimpleTy) {\n"; - for (const auto &[VT, _] : TM) { - StringRef TypeName = getEnumName(VT); + for (const auto &TI : TM) { + MVT::SimpleValueType VT = TI.first; + std::string TypeName = getEnumName(VT).str(); OS << " case " << TypeName << ": return fastEmit_" << getLegalCName(Opcode) << "_" << getLegalCName(TypeName) << "_"; Operands.PrintManglingSuffix(OS, ImmediatePredicates); @@ -792,15 +825,15 @@ void FastISelMap::printFunctionDefinitions(raw_ostream &OS) { // Check each in order it was seen. It would be nice to have a good // relative ordering between them, but we're not going for optimality // here. - for (const OperandsSignature &Sig : MI->second) { + for (unsigned i = 0, e = MI->second.size(); i != e; ++i) { OS << " if ("; - Sig.emitImmediatePredicate(OS, ImmediatePredicates); + MI->second[i].emitImmediatePredicate(OS, ImmediatePredicates); OS << ")\n if (Register Reg = fastEmit_"; - Sig.PrintManglingSuffix(OS, ImmediatePredicates); + MI->second[i].PrintManglingSuffix(OS, ImmediatePredicates); OS << "(VT, RetVT, Opcode"; - if (!Sig.empty()) + if (!MI->second[i].empty()) OS << ", "; - Sig.PrintArguments(OS); + MI->second[i].PrintArguments(OS); OS << "))\n return Reg;\n\n"; } @@ -809,7 +842,9 @@ void FastISelMap::printFunctionDefinitions(raw_ostream &OS) { } OS << " switch (Opcode) {\n"; - for (const auto &[Opcode, _] : OTM) { + for (const auto &I : OTM) { + const std::string &Opcode = I.first; + OS << " case " << Opcode << ": return fastEmit_" << getLegalCName(Opcode) << "_"; Operands.PrintManglingSuffix(OS, ImmediatePredicates); diff --git a/llvm/utils/TableGen/X86RecognizableInstr.cpp b/llvm/utils/TableGen/X86RecognizableInstr.cpp index f93e5fbcc4c27..506995d26d660 100644 --- a/llvm/utils/TableGen/X86RecognizableInstr.cpp +++ b/llvm/utils/TableGen/X86RecognizableInstr.cpp @@ -17,7 +17,6 @@ #include "X86DisassemblerShared.h" #include "X86DisassemblerTables.h" #include "X86ModRMFilters.h" -#include "llvm/ADT/StringSwitch.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/TableGen/Record.h" #include @@ -1021,292 +1020,280 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const { #undef MAP } +#define TYPE(Expected, Type) \ + if (Str == Expected) \ + return Type; + OperandType RecognizableInstr::typeFromString(StringRef Str, bool hasREX_W, uint8_t OpSize) { - StringSwitch Switch(Str); if (hasREX_W) { // For instructions with a REX_W prefix, a declared 32-bit register encoding // is special. - Switch.Case("GR32", TYPE_R32); + TYPE("GR32", TYPE_R32) } if (OpSize == X86Local::OpSize16) { // For OpSize16 instructions, a declared 16-bit register or // immediate encoding is special. - Switch.Case("GR16", TYPE_Rv); + TYPE("GR16", TYPE_Rv) } else if (OpSize == X86Local::OpSize32) { // For OpSize32 instructions, a declared 32-bit register or // immediate encoding is special. - Switch.Case("GR32", TYPE_Rv); + TYPE("GR32", TYPE_Rv) } - OperandType Type = Switch.Case("i16mem", TYPE_M) - .Case("i16imm", TYPE_IMM) - .Case("i16i8imm", TYPE_IMM) - .Case("GR16", TYPE_R16) - .Case("GR16orGR32orGR64", TYPE_R16) - .Case("i32mem", TYPE_M) - .Case("i32imm", TYPE_IMM) - .Case("i32i8imm", TYPE_IMM) - .Case("GR32", TYPE_R32) - .Case("GR32orGR64", TYPE_R32) - .Case("i64mem", TYPE_M) - .Case("i64i32imm", TYPE_IMM) - .Case("i64i8imm", TYPE_IMM) - .Case("GR64", TYPE_R64) - .Case("i8mem", TYPE_M) - .Case("i8imm", TYPE_IMM) - .Case("u4imm", TYPE_UIMM8) - .Case("u8imm", TYPE_UIMM8) - .Case("i16u8imm", TYPE_UIMM8) - .Case("i32u8imm", TYPE_UIMM8) - .Case("i64u8imm", TYPE_UIMM8) - .Case("GR8", TYPE_R8) - .Case("VR128", TYPE_XMM) - .Case("VR128X", TYPE_XMM) - .Case("f128mem", TYPE_M) - .Case("f256mem", TYPE_M) - .Case("f512mem", TYPE_M) - .Case("FR128", TYPE_XMM) - .Case("FR64", TYPE_XMM) - .Case("FR64X", TYPE_XMM) - .Case("f64mem", TYPE_M) - .Case("sdmem", TYPE_M) - .Case("FR16X", TYPE_XMM) - .Case("FR32", TYPE_XMM) - .Case("FR32X", TYPE_XMM) - .Case("f32mem", TYPE_M) - .Case("f16mem", TYPE_M) - .Case("ssmem", TYPE_M) - .Case("shmem", TYPE_M) - .Case("RST", TYPE_ST) - .Case("RSTi", TYPE_ST) - .Case("i128mem", TYPE_M) - .Case("i256mem", TYPE_M) - .Case("i512mem", TYPE_M) - .Case("i512mem_GR16", TYPE_M) - .Case("i512mem_GR32", TYPE_M) - .Case("i512mem_GR64", TYPE_M) - .Case("i64i32imm_brtarget", TYPE_REL) - .Case("i8imm_brtarget", TYPE_REL) - .Case("i16imm_brtarget", TYPE_REL) - .Case("i32imm_brtarget", TYPE_REL) - .Case("ccode", TYPE_IMM) - .Case("cflags", TYPE_IMM) - .Case("AVX512RC", TYPE_IMM) - .Case("brtarget32", TYPE_REL) - .Case("brtarget16", TYPE_REL) - .Case("brtarget8", TYPE_REL) - .Case("f80mem", TYPE_M) - .Case("lea64_8mem", TYPE_M) - .Case("lea64_16mem", TYPE_M) - .Case("lea64_32mem", TYPE_M) - .Case("lea64mem", TYPE_M) - .Case("VR64", TYPE_MM64) - .Case("i64imm", TYPE_IMM) - .Case("anymem", TYPE_M) - .Case("opaquemem", TYPE_M) - .Case("sibmem", TYPE_MSIB) - .Case("SEGMENT_REG", TYPE_SEGMENTREG) - .Case("DEBUG_REG", TYPE_DEBUGREG) - .Case("CONTROL_REG", TYPE_CONTROLREG) - .Case("srcidx8", TYPE_SRCIDX) - .Case("srcidx16", TYPE_SRCIDX) - .Case("srcidx32", TYPE_SRCIDX) - .Case("srcidx64", TYPE_SRCIDX) - .Case("dstidx8", TYPE_DSTIDX) - .Case("dstidx16", TYPE_DSTIDX) - .Case("dstidx32", TYPE_DSTIDX) - .Case("dstidx64", TYPE_DSTIDX) - .Case("offset16_8", TYPE_MOFFS) - .Case("offset16_16", TYPE_MOFFS) - .Case("offset16_32", TYPE_MOFFS) - .Case("offset32_8", TYPE_MOFFS) - .Case("offset32_16", TYPE_MOFFS) - .Case("offset32_32", TYPE_MOFFS) - .Case("offset32_64", TYPE_MOFFS) - .Case("offset64_8", TYPE_MOFFS) - .Case("offset64_16", TYPE_MOFFS) - .Case("offset64_32", TYPE_MOFFS) - .Case("offset64_64", TYPE_MOFFS) - .Case("VR256", TYPE_YMM) - .Case("VR256X", TYPE_YMM) - .Case("VR512", TYPE_ZMM) - .Case("VK1", TYPE_VK) - .Case("VK1WM", TYPE_VK) - .Case("VK2", TYPE_VK) - .Case("VK2WM", TYPE_VK) - .Case("VK4", TYPE_VK) - .Case("VK4WM", TYPE_VK) - .Case("VK8", TYPE_VK) - .Case("VK8WM", TYPE_VK) - .Case("VK16", TYPE_VK) - .Case("VK16WM", TYPE_VK) - .Case("VK32", TYPE_VK) - .Case("VK32WM", TYPE_VK) - .Case("VK64", TYPE_VK) - .Case("VK64WM", TYPE_VK) - .Case("VK1Pair", TYPE_VK_PAIR) - .Case("VK2Pair", TYPE_VK_PAIR) - .Case("VK4Pair", TYPE_VK_PAIR) - .Case("VK8Pair", TYPE_VK_PAIR) - .Case("VK16Pair", TYPE_VK_PAIR) - .Case("vx32mem", TYPE_MVSIBX) - .Case("vx64mem", TYPE_MVSIBX) - .Case("vy32mem", TYPE_MVSIBY) - .Case("vy64mem", TYPE_MVSIBY) - .Case("vx32xmem", TYPE_MVSIBX) - .Case("vx64xmem", TYPE_MVSIBX) - .Case("vy32xmem", TYPE_MVSIBY) - .Case("vy64xmem", TYPE_MVSIBY) - .Case("vz32mem", TYPE_MVSIBZ) - .Case("vz64mem", TYPE_MVSIBZ) - .Case("BNDR", TYPE_BNDR) - .Case("TILE", TYPE_TMM) - .Case("TILEPair", TYPE_TMM_PAIR) - .Default(TYPE_NONE); - - if (Type != TYPE_NONE) - return Type; + TYPE("i16mem", TYPE_M) + TYPE("i16imm", TYPE_IMM) + TYPE("i16i8imm", TYPE_IMM) + TYPE("GR16", TYPE_R16) + TYPE("GR16orGR32orGR64", TYPE_R16) + TYPE("i32mem", TYPE_M) + TYPE("i32imm", TYPE_IMM) + TYPE("i32i8imm", TYPE_IMM) + TYPE("GR32", TYPE_R32) + TYPE("GR32orGR64", TYPE_R32) + TYPE("i64mem", TYPE_M) + TYPE("i64i32imm", TYPE_IMM) + TYPE("i64i8imm", TYPE_IMM) + TYPE("GR64", TYPE_R64) + TYPE("i8mem", TYPE_M) + TYPE("i8imm", TYPE_IMM) + TYPE("u4imm", TYPE_UIMM8) + TYPE("u8imm", TYPE_UIMM8) + TYPE("i16u8imm", TYPE_UIMM8) + TYPE("i32u8imm", TYPE_UIMM8) + TYPE("i64u8imm", TYPE_UIMM8) + TYPE("GR8", TYPE_R8) + TYPE("VR128", TYPE_XMM) + TYPE("VR128X", TYPE_XMM) + TYPE("f128mem", TYPE_M) + TYPE("f256mem", TYPE_M) + TYPE("f512mem", TYPE_M) + TYPE("FR128", TYPE_XMM) + TYPE("FR64", TYPE_XMM) + TYPE("FR64X", TYPE_XMM) + TYPE("f64mem", TYPE_M) + TYPE("sdmem", TYPE_M) + TYPE("FR16X", TYPE_XMM) + TYPE("FR32", TYPE_XMM) + TYPE("FR32X", TYPE_XMM) + TYPE("f32mem", TYPE_M) + TYPE("f16mem", TYPE_M) + TYPE("ssmem", TYPE_M) + TYPE("shmem", TYPE_M) + TYPE("RST", TYPE_ST) + TYPE("RSTi", TYPE_ST) + TYPE("i128mem", TYPE_M) + TYPE("i256mem", TYPE_M) + TYPE("i512mem", TYPE_M) + TYPE("i512mem_GR16", TYPE_M) + TYPE("i512mem_GR32", TYPE_M) + TYPE("i512mem_GR64", TYPE_M) + TYPE("i64i32imm_brtarget", TYPE_REL) + TYPE("i8imm_brtarget", TYPE_REL) + TYPE("i16imm_brtarget", TYPE_REL) + TYPE("i32imm_brtarget", TYPE_REL) + TYPE("ccode", TYPE_IMM) + TYPE("cflags", TYPE_IMM) + TYPE("AVX512RC", TYPE_IMM) + TYPE("brtarget32", TYPE_REL) + TYPE("brtarget16", TYPE_REL) + TYPE("brtarget8", TYPE_REL) + TYPE("f80mem", TYPE_M) + TYPE("lea64_8mem", TYPE_M) + TYPE("lea64_16mem", TYPE_M) + TYPE("lea64_32mem", TYPE_M) + TYPE("lea64mem", TYPE_M) + TYPE("VR64", TYPE_MM64) + TYPE("i64imm", TYPE_IMM) + TYPE("anymem", TYPE_M) + TYPE("opaquemem", TYPE_M) + TYPE("sibmem", TYPE_MSIB) + TYPE("SEGMENT_REG", TYPE_SEGMENTREG) + TYPE("DEBUG_REG", TYPE_DEBUGREG) + TYPE("CONTROL_REG", TYPE_CONTROLREG) + TYPE("srcidx8", TYPE_SRCIDX) + TYPE("srcidx16", TYPE_SRCIDX) + TYPE("srcidx32", TYPE_SRCIDX) + TYPE("srcidx64", TYPE_SRCIDX) + TYPE("dstidx8", TYPE_DSTIDX) + TYPE("dstidx16", TYPE_DSTIDX) + TYPE("dstidx32", TYPE_DSTIDX) + TYPE("dstidx64", TYPE_DSTIDX) + TYPE("offset16_8", TYPE_MOFFS) + TYPE("offset16_16", TYPE_MOFFS) + TYPE("offset16_32", TYPE_MOFFS) + TYPE("offset32_8", TYPE_MOFFS) + TYPE("offset32_16", TYPE_MOFFS) + TYPE("offset32_32", TYPE_MOFFS) + TYPE("offset32_64", TYPE_MOFFS) + TYPE("offset64_8", TYPE_MOFFS) + TYPE("offset64_16", TYPE_MOFFS) + TYPE("offset64_32", TYPE_MOFFS) + TYPE("offset64_64", TYPE_MOFFS) + TYPE("VR256", TYPE_YMM) + TYPE("VR256X", TYPE_YMM) + TYPE("VR512", TYPE_ZMM) + TYPE("VK1", TYPE_VK) + TYPE("VK1WM", TYPE_VK) + TYPE("VK2", TYPE_VK) + TYPE("VK2WM", TYPE_VK) + TYPE("VK4", TYPE_VK) + TYPE("VK4WM", TYPE_VK) + TYPE("VK8", TYPE_VK) + TYPE("VK8WM", TYPE_VK) + TYPE("VK16", TYPE_VK) + TYPE("VK16WM", TYPE_VK) + TYPE("VK32", TYPE_VK) + TYPE("VK32WM", TYPE_VK) + TYPE("VK64", TYPE_VK) + TYPE("VK64WM", TYPE_VK) + TYPE("VK1Pair", TYPE_VK_PAIR) + TYPE("VK2Pair", TYPE_VK_PAIR) + TYPE("VK4Pair", TYPE_VK_PAIR) + TYPE("VK8Pair", TYPE_VK_PAIR) + TYPE("VK16Pair", TYPE_VK_PAIR) + TYPE("vx32mem", TYPE_MVSIBX) + TYPE("vx64mem", TYPE_MVSIBX) + TYPE("vy32mem", TYPE_MVSIBY) + TYPE("vy64mem", TYPE_MVSIBY) + TYPE("vx32xmem", TYPE_MVSIBX) + TYPE("vx64xmem", TYPE_MVSIBX) + TYPE("vy32xmem", TYPE_MVSIBY) + TYPE("vy64xmem", TYPE_MVSIBY) + TYPE("vz32mem", TYPE_MVSIBZ) + TYPE("vz64mem", TYPE_MVSIBZ) + TYPE("BNDR", TYPE_BNDR) + TYPE("TILE", TYPE_TMM) + TYPE("TILEPair", TYPE_TMM_PAIR) errs() << "Unhandled type string " << Str << "\n"; llvm_unreachable("Unhandled type string"); } +#undef TYPE + +#define ENCODING(Expected, Encoding) \ + if (Str == Expected) \ + return Encoding; OperandEncoding RecognizableInstr::immediateEncodingFromString(StringRef Str, uint8_t OpSize) { - StringSwitch Switch(Str); if (OpSize != X86Local::OpSize16) { // For instructions without an OpSize prefix, a declared 16-bit register or // immediate encoding is special. - Switch.Case("i16imm", ENCODING_IW); + ENCODING("i16imm", ENCODING_IW) } - OperandEncoding Encoding = - Switch.Case("i32i8imm", ENCODING_IB) - .Case("AVX512RC", ENCODING_IRC) - .Case("i16imm", ENCODING_Iv) - .Case("i16i8imm", ENCODING_IB) - .Case("i32imm", ENCODING_Iv) - .Case("i64i32imm", ENCODING_ID) - .Case("i64i8imm", ENCODING_IB) - .Case("i8imm", ENCODING_IB) - .Case("ccode", ENCODING_CC) - .Case("cflags", ENCODING_CF) - .Case("u4imm", ENCODING_IB) - .Case("u8imm", ENCODING_IB) - .Case("i16u8imm", ENCODING_IB) - .Case("i32u8imm", ENCODING_IB) - .Case("i64u8imm", ENCODING_IB) - // This is not a typo. Instructions like BLENDVPD put - // register IDs in 8-bit immediates nowadays. - .Case("FR32", ENCODING_IB) - .Case("FR64", ENCODING_IB) - .Case("FR128", ENCODING_IB) - .Case("VR128", ENCODING_IB) - .Case("VR256", ENCODING_IB) - .Case("FR16X", ENCODING_IB) - .Case("FR32X", ENCODING_IB) - .Case("FR64X", ENCODING_IB) - .Case("VR128X", ENCODING_IB) - .Case("VR256X", ENCODING_IB) - .Case("VR512", ENCODING_IB) - .Case("TILE", ENCODING_IB) - .Default(ENCODING_NONE); - - if (Encoding != ENCODING_NONE) - return Encoding; + ENCODING("i32i8imm", ENCODING_IB) + ENCODING("AVX512RC", ENCODING_IRC) + ENCODING("i16imm", ENCODING_Iv) + ENCODING("i16i8imm", ENCODING_IB) + ENCODING("i32imm", ENCODING_Iv) + ENCODING("i64i32imm", ENCODING_ID) + ENCODING("i64i8imm", ENCODING_IB) + ENCODING("i8imm", ENCODING_IB) + ENCODING("ccode", ENCODING_CC) + ENCODING("cflags", ENCODING_CF) + ENCODING("u4imm", ENCODING_IB) + ENCODING("u8imm", ENCODING_IB) + ENCODING("i16u8imm", ENCODING_IB) + ENCODING("i32u8imm", ENCODING_IB) + ENCODING("i64u8imm", ENCODING_IB) + // This is not a typo. Instructions like BLENDVPD put + // register IDs in 8-bit immediates nowadays. + ENCODING("FR32", ENCODING_IB) + ENCODING("FR64", ENCODING_IB) + ENCODING("FR128", ENCODING_IB) + ENCODING("VR128", ENCODING_IB) + ENCODING("VR256", ENCODING_IB) + ENCODING("FR16X", ENCODING_IB) + ENCODING("FR32X", ENCODING_IB) + ENCODING("FR64X", ENCODING_IB) + ENCODING("VR128X", ENCODING_IB) + ENCODING("VR256X", ENCODING_IB) + ENCODING("VR512", ENCODING_IB) + ENCODING("TILE", ENCODING_IB) errs() << "Unhandled immediate encoding " << Str << "\n"; llvm_unreachable("Unhandled immediate encoding"); } OperandEncoding RecognizableInstr::rmRegisterEncodingFromString(StringRef Str, uint8_t OpSize) { - auto Encoding = StringSwitch(Str) - .Case("RST", ENCODING_FP) - .Case("RSTi", ENCODING_FP) - .Case("GR16", ENCODING_RM) - .Case("GR16orGR32orGR64", ENCODING_RM) - .Case("GR32", ENCODING_RM) - .Case("GR32orGR64", ENCODING_RM) - .Case("GR64", ENCODING_RM) - .Case("GR8", ENCODING_RM) - .Case("VR128", ENCODING_RM) - .Case("VR128X", ENCODING_RM) - .Case("FR128", ENCODING_RM) - .Case("FR64", ENCODING_RM) - .Case("FR32", ENCODING_RM) - .Case("FR64X", ENCODING_RM) - .Case("FR32X", ENCODING_RM) - .Case("FR16X", ENCODING_RM) - .Case("VR64", ENCODING_RM) - .Case("VR256", ENCODING_RM) - .Case("VR256X", ENCODING_RM) - .Case("VR512", ENCODING_RM) - .Case("VK1", ENCODING_RM) - .Case("VK2", ENCODING_RM) - .Case("VK4", ENCODING_RM) - .Case("VK8", ENCODING_RM) - .Case("VK16", ENCODING_RM) - .Case("VK32", ENCODING_RM) - .Case("VK64", ENCODING_RM) - .Case("BNDR", ENCODING_RM) - .Case("TILE", ENCODING_RM) - .Case("TILEPair", ENCODING_RM) - .Default(ENCODING_NONE); - if (Encoding != ENCODING_NONE) - return Encoding; + ENCODING("RST", ENCODING_FP) + ENCODING("RSTi", ENCODING_FP) + ENCODING("GR16", ENCODING_RM) + ENCODING("GR16orGR32orGR64", ENCODING_RM) + ENCODING("GR32", ENCODING_RM) + ENCODING("GR32orGR64", ENCODING_RM) + ENCODING("GR64", ENCODING_RM) + ENCODING("GR8", ENCODING_RM) + ENCODING("VR128", ENCODING_RM) + ENCODING("VR128X", ENCODING_RM) + ENCODING("FR128", ENCODING_RM) + ENCODING("FR64", ENCODING_RM) + ENCODING("FR32", ENCODING_RM) + ENCODING("FR64X", ENCODING_RM) + ENCODING("FR32X", ENCODING_RM) + ENCODING("FR16X", ENCODING_RM) + ENCODING("VR64", ENCODING_RM) + ENCODING("VR256", ENCODING_RM) + ENCODING("VR256X", ENCODING_RM) + ENCODING("VR512", ENCODING_RM) + ENCODING("VK1", ENCODING_RM) + ENCODING("VK2", ENCODING_RM) + ENCODING("VK4", ENCODING_RM) + ENCODING("VK8", ENCODING_RM) + ENCODING("VK16", ENCODING_RM) + ENCODING("VK32", ENCODING_RM) + ENCODING("VK64", ENCODING_RM) + ENCODING("BNDR", ENCODING_RM) + ENCODING("TILE", ENCODING_RM) + ENCODING("TILEPair", ENCODING_RM) errs() << "Unhandled R/M register encoding " << Str << "\n"; llvm_unreachable("Unhandled R/M register encoding"); } OperandEncoding RecognizableInstr::roRegisterEncodingFromString(StringRef Str, uint8_t OpSize) { - auto Encoding = StringSwitch(Str) - .Case("GR16", ENCODING_REG) - .Case("GR16orGR32orGR64", ENCODING_REG) - .Case("GR32", ENCODING_REG) - .Case("GR32orGR64", ENCODING_REG) - .Case("GR64", ENCODING_REG) - .Case("GR8", ENCODING_REG) - .Case("VR128", ENCODING_REG) - .Case("FR128", ENCODING_REG) - .Case("FR64", ENCODING_REG) - .Case("FR32", ENCODING_REG) - .Case("VR64", ENCODING_REG) - .Case("SEGMENT_REG", ENCODING_REG) - .Case("DEBUG_REG", ENCODING_REG) - .Case("CONTROL_REG", ENCODING_REG) - .Case("VR256", ENCODING_REG) - .Case("VR256X", ENCODING_REG) - .Case("VR128X", ENCODING_REG) - .Case("FR64X", ENCODING_REG) - .Case("FR32X", ENCODING_REG) - .Case("FR16X", ENCODING_REG) - .Case("VR512", ENCODING_REG) - .Case("VK1", ENCODING_REG) - .Case("VK2", ENCODING_REG) - .Case("VK4", ENCODING_REG) - .Case("VK8", ENCODING_REG) - .Case("VK16", ENCODING_REG) - .Case("VK32", ENCODING_REG) - .Case("VK64", ENCODING_REG) - .Case("VK1Pair", ENCODING_REG) - .Case("VK2Pair", ENCODING_REG) - .Case("VK4Pair", ENCODING_REG) - .Case("VK8Pair", ENCODING_REG) - .Case("VK16Pair", ENCODING_REG) - .Case("VK1WM", ENCODING_REG) - .Case("VK2WM", ENCODING_REG) - .Case("VK4WM", ENCODING_REG) - .Case("VK8WM", ENCODING_REG) - .Case("VK16WM", ENCODING_REG) - .Case("VK32WM", ENCODING_REG) - .Case("VK64WM", ENCODING_REG) - .Case("BNDR", ENCODING_REG) - .Case("TILE", ENCODING_REG) - .Case("TILEPair", ENCODING_REG) - .Default(ENCODING_NONE); - - if (Encoding != ENCODING_NONE) - return Encoding; - + ENCODING("GR16", ENCODING_REG) + ENCODING("GR16orGR32orGR64", ENCODING_REG) + ENCODING("GR32", ENCODING_REG) + ENCODING("GR32orGR64", ENCODING_REG) + ENCODING("GR64", ENCODING_REG) + ENCODING("GR8", ENCODING_REG) + ENCODING("VR128", ENCODING_REG) + ENCODING("FR128", ENCODING_REG) + ENCODING("FR64", ENCODING_REG) + ENCODING("FR32", ENCODING_REG) + ENCODING("VR64", ENCODING_REG) + ENCODING("SEGMENT_REG", ENCODING_REG) + ENCODING("DEBUG_REG", ENCODING_REG) + ENCODING("CONTROL_REG", ENCODING_REG) + ENCODING("VR256", ENCODING_REG) + ENCODING("VR256X", ENCODING_REG) + ENCODING("VR128X", ENCODING_REG) + ENCODING("FR64X", ENCODING_REG) + ENCODING("FR32X", ENCODING_REG) + ENCODING("FR16X", ENCODING_REG) + ENCODING("VR512", ENCODING_REG) + ENCODING("VK1", ENCODING_REG) + ENCODING("VK2", ENCODING_REG) + ENCODING("VK4", ENCODING_REG) + ENCODING("VK8", ENCODING_REG) + ENCODING("VK16", ENCODING_REG) + ENCODING("VK32", ENCODING_REG) + ENCODING("VK64", ENCODING_REG) + ENCODING("VK1Pair", ENCODING_REG) + ENCODING("VK2Pair", ENCODING_REG) + ENCODING("VK4Pair", ENCODING_REG) + ENCODING("VK8Pair", ENCODING_REG) + ENCODING("VK16Pair", ENCODING_REG) + ENCODING("VK1WM", ENCODING_REG) + ENCODING("VK2WM", ENCODING_REG) + ENCODING("VK4WM", ENCODING_REG) + ENCODING("VK8WM", ENCODING_REG) + ENCODING("VK16WM", ENCODING_REG) + ENCODING("VK32WM", ENCODING_REG) + ENCODING("VK64WM", ENCODING_REG) + ENCODING("BNDR", ENCODING_REG) + ENCODING("TILE", ENCODING_REG) + ENCODING("TILEPair", ENCODING_REG) errs() << "Unhandled reg/opcode register encoding " << Str << "\n"; llvm_unreachable("Unhandled reg/opcode register encoding"); } @@ -1314,36 +1301,30 @@ RecognizableInstr::roRegisterEncodingFromString(StringRef Str, uint8_t OpSize) { OperandEncoding RecognizableInstr::vvvvRegisterEncodingFromString(StringRef Str, uint8_t OpSize) { - auto Encoding = StringSwitch(Str) - .Case("GR8", ENCODING_VVVV) - .Case("GR16", ENCODING_VVVV) - .Case("GR32", ENCODING_VVVV) - .Case("GR64", ENCODING_VVVV) - .Case("FR32", ENCODING_VVVV) - .Case("FR128", ENCODING_VVVV) - .Case("FR64", ENCODING_VVVV) - .Case("VR128", ENCODING_VVVV) - .Case("VR256", ENCODING_VVVV) - .Case("FR16X", ENCODING_VVVV) - .Case("FR32X", ENCODING_VVVV) - .Case("FR64X", ENCODING_VVVV) - .Case("VR128X", ENCODING_VVVV) - .Case("VR256X", ENCODING_VVVV) - .Case("VR512", ENCODING_VVVV) - .Case("VK1", ENCODING_VVVV) - .Case("VK2", ENCODING_VVVV) - .Case("VK4", ENCODING_VVVV) - .Case("VK8", ENCODING_VVVV) - .Case("VK16", ENCODING_VVVV) - .Case("VK32", ENCODING_VVVV) - .Case("VK64", ENCODING_VVVV) - .Case("TILE", ENCODING_VVVV) - .Case("TILEPair", ENCODING_VVVV) - .Default(ENCODING_NONE); - - if (Encoding != ENCODING_NONE) - return Encoding; - + ENCODING("GR8", ENCODING_VVVV) + ENCODING("GR16", ENCODING_VVVV) + ENCODING("GR32", ENCODING_VVVV) + ENCODING("GR64", ENCODING_VVVV) + ENCODING("FR32", ENCODING_VVVV) + ENCODING("FR128", ENCODING_VVVV) + ENCODING("FR64", ENCODING_VVVV) + ENCODING("VR128", ENCODING_VVVV) + ENCODING("VR256", ENCODING_VVVV) + ENCODING("FR16X", ENCODING_VVVV) + ENCODING("FR32X", ENCODING_VVVV) + ENCODING("FR64X", ENCODING_VVVV) + ENCODING("VR128X", ENCODING_VVVV) + ENCODING("VR256X", ENCODING_VVVV) + ENCODING("VR512", ENCODING_VVVV) + ENCODING("VK1", ENCODING_VVVV) + ENCODING("VK2", ENCODING_VVVV) + ENCODING("VK4", ENCODING_VVVV) + ENCODING("VK8", ENCODING_VVVV) + ENCODING("VK16", ENCODING_VVVV) + ENCODING("VK32", ENCODING_VVVV) + ENCODING("VK64", ENCODING_VVVV) + ENCODING("TILE", ENCODING_VVVV) + ENCODING("TILEPair", ENCODING_VVVV) errs() << "Unhandled VEX.vvvv register encoding " << Str << "\n"; llvm_unreachable("Unhandled VEX.vvvv register encoding"); } @@ -1351,125 +1332,105 @@ RecognizableInstr::vvvvRegisterEncodingFromString(StringRef Str, OperandEncoding RecognizableInstr::writemaskRegisterEncodingFromString(StringRef Str, uint8_t OpSize) { - auto Encoding = StringSwitch(Str) - .Case("VK1WM", ENCODING_WRITEMASK) - .Case("VK2WM", ENCODING_WRITEMASK) - .Case("VK4WM", ENCODING_WRITEMASK) - .Case("VK8WM", ENCODING_WRITEMASK) - .Case("VK16WM", ENCODING_WRITEMASK) - .Case("VK32WM", ENCODING_WRITEMASK) - .Case("VK64WM", ENCODING_WRITEMASK) - .Default(ENCODING_NONE); - - if (Encoding != ENCODING_NONE) - return Encoding; - + ENCODING("VK1WM", ENCODING_WRITEMASK) + ENCODING("VK2WM", ENCODING_WRITEMASK) + ENCODING("VK4WM", ENCODING_WRITEMASK) + ENCODING("VK8WM", ENCODING_WRITEMASK) + ENCODING("VK16WM", ENCODING_WRITEMASK) + ENCODING("VK32WM", ENCODING_WRITEMASK) + ENCODING("VK64WM", ENCODING_WRITEMASK) errs() << "Unhandled mask register encoding " << Str << "\n"; llvm_unreachable("Unhandled mask register encoding"); } OperandEncoding RecognizableInstr::memoryEncodingFromString(StringRef Str, uint8_t OpSize) { - auto Encoding = StringSwitch(Str) - .Case("i16mem", ENCODING_RM) - .Case("i32mem", ENCODING_RM) - .Case("i64mem", ENCODING_RM) - .Case("i8mem", ENCODING_RM) - .Case("shmem", ENCODING_RM) - .Case("ssmem", ENCODING_RM) - .Case("sdmem", ENCODING_RM) - .Case("f128mem", ENCODING_RM) - .Case("f256mem", ENCODING_RM) - .Case("f512mem", ENCODING_RM) - .Case("f64mem", ENCODING_RM) - .Case("f32mem", ENCODING_RM) - .Case("f16mem", ENCODING_RM) - .Case("i128mem", ENCODING_RM) - .Case("i256mem", ENCODING_RM) - .Case("i512mem", ENCODING_RM) - .Case("i512mem_GR16", ENCODING_RM) - .Case("i512mem_GR32", ENCODING_RM) - .Case("i512mem_GR64", ENCODING_RM) - .Case("f80mem", ENCODING_RM) - .Case("lea64_8mem", ENCODING_RM) - .Case("lea64_16mem", ENCODING_RM) - .Case("lea64_32mem", ENCODING_RM) - .Case("lea64mem", ENCODING_RM) - .Case("anymem", ENCODING_RM) - .Case("opaquemem", ENCODING_RM) - .Case("sibmem", ENCODING_SIB) - .Case("vx32mem", ENCODING_VSIB) - .Case("vx64mem", ENCODING_VSIB) - .Case("vy32mem", ENCODING_VSIB) - .Case("vy64mem", ENCODING_VSIB) - .Case("vx32xmem", ENCODING_VSIB) - .Case("vx64xmem", ENCODING_VSIB) - .Case("vy32xmem", ENCODING_VSIB) - .Case("vy64xmem", ENCODING_VSIB) - .Case("vz32mem", ENCODING_VSIB) - .Case("vz64mem", ENCODING_VSIB) - .Default(ENCODING_NONE); - - if (Encoding != ENCODING_NONE) - return Encoding; - + ENCODING("i16mem", ENCODING_RM) + ENCODING("i32mem", ENCODING_RM) + ENCODING("i64mem", ENCODING_RM) + ENCODING("i8mem", ENCODING_RM) + ENCODING("shmem", ENCODING_RM) + ENCODING("ssmem", ENCODING_RM) + ENCODING("sdmem", ENCODING_RM) + ENCODING("f128mem", ENCODING_RM) + ENCODING("f256mem", ENCODING_RM) + ENCODING("f512mem", ENCODING_RM) + ENCODING("f64mem", ENCODING_RM) + ENCODING("f32mem", ENCODING_RM) + ENCODING("f16mem", ENCODING_RM) + ENCODING("i128mem", ENCODING_RM) + ENCODING("i256mem", ENCODING_RM) + ENCODING("i512mem", ENCODING_RM) + ENCODING("i512mem_GR16", ENCODING_RM) + ENCODING("i512mem_GR32", ENCODING_RM) + ENCODING("i512mem_GR64", ENCODING_RM) + ENCODING("f80mem", ENCODING_RM) + ENCODING("lea64_8mem", ENCODING_RM) + ENCODING("lea64_16mem", ENCODING_RM) + ENCODING("lea64_32mem", ENCODING_RM) + ENCODING("lea64mem", ENCODING_RM) + ENCODING("anymem", ENCODING_RM) + ENCODING("opaquemem", ENCODING_RM) + ENCODING("sibmem", ENCODING_SIB) + ENCODING("vx32mem", ENCODING_VSIB) + ENCODING("vx64mem", ENCODING_VSIB) + ENCODING("vy32mem", ENCODING_VSIB) + ENCODING("vy64mem", ENCODING_VSIB) + ENCODING("vx32xmem", ENCODING_VSIB) + ENCODING("vx64xmem", ENCODING_VSIB) + ENCODING("vy32xmem", ENCODING_VSIB) + ENCODING("vy64xmem", ENCODING_VSIB) + ENCODING("vz32mem", ENCODING_VSIB) + ENCODING("vz64mem", ENCODING_VSIB) errs() << "Unhandled memory encoding " << Str << "\n"; llvm_unreachable("Unhandled memory encoding"); } OperandEncoding RecognizableInstr::relocationEncodingFromString(StringRef Str, uint8_t OpSize) { - StringSwitch Switch(Str); - if (OpSize != X86Local::OpSize16) { // For instructions without an OpSize prefix, a declared 16-bit register or // immediate encoding is special. - Switch.Case("i16imm", ENCODING_IW); + ENCODING("i16imm", ENCODING_IW) } - - OperandEncoding Encoding = Switch.Case("i16imm", ENCODING_Iv) - .Case("i16i8imm", ENCODING_IB) - .Case("i32imm", ENCODING_Iv) - .Case("i32i8imm", ENCODING_IB) - .Case("i64i32imm", ENCODING_ID) - .Case("i64i8imm", ENCODING_IB) - .Case("i8imm", ENCODING_IB) - .Case("u8imm", ENCODING_IB) - .Case("i16u8imm", ENCODING_IB) - .Case("i32u8imm", ENCODING_IB) - .Case("i64u8imm", ENCODING_IB) - .Case("i64i32imm_brtarget", ENCODING_ID) - .Case("i16imm_brtarget", ENCODING_IW) - .Case("i32imm_brtarget", ENCODING_ID) - .Case("i8imm_brtarget", ENCODING_IB) - .Case("brtarget32", ENCODING_ID) - .Case("brtarget16", ENCODING_IW) - .Case("brtarget8", ENCODING_IB) - .Case("i64imm", ENCODING_IO) - .Case("offset16_8", ENCODING_Ia) - .Case("offset16_16", ENCODING_Ia) - .Case("offset16_32", ENCODING_Ia) - .Case("offset32_8", ENCODING_Ia) - .Case("offset32_16", ENCODING_Ia) - .Case("offset32_32", ENCODING_Ia) - .Case("offset32_64", ENCODING_Ia) - .Case("offset64_8", ENCODING_Ia) - .Case("offset64_16", ENCODING_Ia) - .Case("offset64_32", ENCODING_Ia) - .Case("offset64_64", ENCODING_Ia) - .Case("srcidx8", ENCODING_SI) - .Case("srcidx16", ENCODING_SI) - .Case("srcidx32", ENCODING_SI) - .Case("srcidx64", ENCODING_SI) - .Case("dstidx8", ENCODING_DI) - .Case("dstidx16", ENCODING_DI) - .Case("dstidx32", ENCODING_DI) - .Case("dstidx64", ENCODING_DI) - .Default(ENCODING_NONE); - - if (Encoding != ENCODING_NONE) - return Encoding; - + ENCODING("i16imm", ENCODING_Iv) + ENCODING("i16i8imm", ENCODING_IB) + ENCODING("i32imm", ENCODING_Iv) + ENCODING("i32i8imm", ENCODING_IB) + ENCODING("i64i32imm", ENCODING_ID) + ENCODING("i64i8imm", ENCODING_IB) + ENCODING("i8imm", ENCODING_IB) + ENCODING("u8imm", ENCODING_IB) + ENCODING("i16u8imm", ENCODING_IB) + ENCODING("i32u8imm", ENCODING_IB) + ENCODING("i64u8imm", ENCODING_IB) + ENCODING("i64i32imm_brtarget", ENCODING_ID) + ENCODING("i16imm_brtarget", ENCODING_IW) + ENCODING("i32imm_brtarget", ENCODING_ID) + ENCODING("i8imm_brtarget", ENCODING_IB) + ENCODING("brtarget32", ENCODING_ID) + ENCODING("brtarget16", ENCODING_IW) + ENCODING("brtarget8", ENCODING_IB) + ENCODING("i64imm", ENCODING_IO) + ENCODING("offset16_8", ENCODING_Ia) + ENCODING("offset16_16", ENCODING_Ia) + ENCODING("offset16_32", ENCODING_Ia) + ENCODING("offset32_8", ENCODING_Ia) + ENCODING("offset32_16", ENCODING_Ia) + ENCODING("offset32_32", ENCODING_Ia) + ENCODING("offset32_64", ENCODING_Ia) + ENCODING("offset64_8", ENCODING_Ia) + ENCODING("offset64_16", ENCODING_Ia) + ENCODING("offset64_32", ENCODING_Ia) + ENCODING("offset64_64", ENCODING_Ia) + ENCODING("srcidx8", ENCODING_SI) + ENCODING("srcidx16", ENCODING_SI) + ENCODING("srcidx32", ENCODING_SI) + ENCODING("srcidx64", ENCODING_SI) + ENCODING("dstidx8", ENCODING_DI) + ENCODING("dstidx16", ENCODING_DI) + ENCODING("dstidx32", ENCODING_DI) + ENCODING("dstidx64", ENCODING_DI) errs() << "Unhandled relocation encoding " << Str << "\n"; llvm_unreachable("Unhandled relocation encoding"); } @@ -1477,16 +1438,12 @@ RecognizableInstr::relocationEncodingFromString(StringRef Str, uint8_t OpSize) { OperandEncoding RecognizableInstr::opcodeModifierEncodingFromString(StringRef Str, uint8_t OpSize) { - auto Encoding = StringSwitch(Str) - .Case("GR32", ENCODING_Rv) - .Case("GR64", ENCODING_RO) - .Case("GR16", ENCODING_Rv) - .Case("GR8", ENCODING_RB) - .Case("ccode", ENCODING_CC) - .Default(ENCODING_NONE); - if (Encoding != ENCODING_NONE) - return Encoding; - + ENCODING("GR32", ENCODING_Rv) + ENCODING("GR64", ENCODING_RO) + ENCODING("GR16", ENCODING_Rv) + ENCODING("GR8", ENCODING_RB) + ENCODING("ccode", ENCODING_CC) errs() << "Unhandled opcode modifier encoding " << Str << "\n"; llvm_unreachable("Unhandled opcode modifier encoding"); } +#undef ENCODING diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-doc/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-doc/BUILD.gn index 3794e15189e50..95196fc3ebf72 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-doc/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-doc/BUILD.gn @@ -22,7 +22,6 @@ static_library("clang-doc") { "ClangDoc.cpp", "Generators.cpp", "HTMLGenerator.cpp", - "HTMLMustacheGenerator.cpp", "MDGenerator.cpp", "Mapper.cpp", "Representation.cpp", diff --git a/llvm/utils/gn/secondary/clang-tools-extra/unittests/clang-doc/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/unittests/clang-doc/BUILD.gn index fbb1df4891ead..17ee6c3dee677 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/unittests/clang-doc/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/unittests/clang-doc/BUILD.gn @@ -16,7 +16,6 @@ unittest("ClangDocTests") { "//llvm/lib/Bitcode/Reader", "//llvm/lib/Bitcode/Writer", "//llvm/lib/Support", - "//llvm/lib/Testing/Support", ] include_dirs = [ "//clang-tools-extra/clang-doc" ] sources = [ @@ -24,7 +23,6 @@ unittest("ClangDocTests") { "ClangDocTest.cpp", "GeneratorTest.cpp", "HTMLGeneratorTest.cpp", - "HTMLMustacheGeneratorTest.cpp", "MDGeneratorTest.cpp", "MergeTest.cpp", "SerializeTest.cpp", diff --git a/llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn index 4230c55da6420..274f5b54345c7 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn @@ -3,7 +3,6 @@ static_library("Passes") { deps = [ "//llvm/lib/Analysis", "//llvm/lib/CodeGen", - "//llvm/lib/CodeGen/GlobalISel", "//llvm/lib/IR", "//llvm/lib/IRPrinter", "//llvm/lib/Support", diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/Vectorize/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/Vectorize/BUILD.gn index e0fb8198e7892..56dd2dcd170ea 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Transforms/Vectorize/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/Vectorize/BUILD.gn @@ -8,7 +8,6 @@ static_library("Vectorize") { "//llvm/lib/Transforms/Utils", ] sources = [ - "EVLIndVarSimplify.cpp", "LoadStoreVectorizer.cpp", "LoopIdiomVectorize.cpp", "LoopVectorizationLegality.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn index 9a76fe6a84781..456c4f97c7f25 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/CodeGen/BUILD.gn @@ -28,7 +28,6 @@ unittest("CodeGenTests") { "DIETest.cpp", "DroppedVariableStatsMIRTest.cpp", "DwarfStringPoolEntryRefTest.cpp", - "GCMetadata.cpp", "InstrRefLDVTest.cpp", "LexicalScopesTest.cpp", "LowLevelTypeTest.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/Transforms/Vectorize/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Transforms/Vectorize/BUILD.gn index 4c84add8612e8..d561917a974f4 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/Transforms/Vectorize/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/Transforms/Vectorize/BUILD.gn @@ -11,7 +11,6 @@ unittest("VectorizeTests") { sources = [ "VPDomTreeTest.cpp", "VPlanHCFGTest.cpp", - "VPlanPatternMatchTest.cpp", "VPlanSlpTest.cpp", "VPlanTest.cpp", "VPlanVerifierTest.cpp", diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index a8e7dcb54ac20..654aff71f25be 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -236,76 +236,6 @@ foreach index = !range(0, 32) in { def NVVM_EnvReg # index # Op : NVVM_SpecialRegisterOp<"read.ptx.sreg.envreg" # index>; } -//===----------------------------------------------------------------------===// -// Inline PTX op definition -//===----------------------------------------------------------------------===// - -def NVVM_InlinePtxOp : NVVM_Op<"inline_ptx", - [DeclareOpInterfaceMethods, - AttrSizedOperandSegments]> -{ - let summary = "Inline PTX Op"; - let description = [{This op allows using PTX directly within the NVVM - dialect, while greatly simplifying llvm.inline_asm generation. It - automatically handles register size selection and sets the correct - read/write access for each operand. The operation leverages the - `BasicPtxBuilderInterface` to abstract away low-level details of - PTX assembly formatting. - - The `predicate` attribute is used to specify a predicate for the - PTX instruction. - - Example 1: Read-only Parameters - ```mlir - nvvm.inline_ptx "mbarrier.init.b64 [$0], $1;" (%barrier_gen, %count) : !llvm.ptr, i32 - - // Lowers to: - llvm.inline_asm has_side_effects asm_dialect = att - "mbarrier.init.b64 [$0], $1;", "l,r" %arg0, %arg2 : (!llvm.ptr, i32) -> () - ``` - - Example 2: Read-only and Write-only Parameters - ```mlir - %0 = nvvm.inline_ptx "ex2.approx.ftz.f32 $0, $1;" (%input) : f32 -> f32 - - // Lowers to: - %0 = llvm.inline_asm has_side_effects asm_dialect = att - "ex2.approx.ftz.f32 $0, $1;", "=f,f" %arg0 : (f32) -> f32 - ``` - - Example 3: Predicate Usage - ```mlir - nvvm.inline_ptx "mbarrier.init.b64 [$0], $1;" (%barrier_gen, %count), - predicate = %pred : !llvm.ptr, i32, i1 - - // Lowers to: - llvm.inline_asm has_side_effects asm_dialect = att - "@$2 mbarrier.init.b64 [$0], $1;", "l,r,b" %arg0, %arg2, %arg3 - : (!llvm.ptr, i32, i1) -> () - ``` - }]; - - let arguments = (ins Variadic:$readOnlyArgs, - StrAttr:$ptxCode, - PtxPredicate:$predicate); - - let results = (outs Variadic:$writeOnlyArgs); - - let assemblyFormat = [{ - $ptxCode `(` $readOnlyArgs `)` - (`,` `predicate` `=` $predicate^)? attr-dict - `:` type(operands) - (`->` type($writeOnlyArgs)^)? - }]; - - let extraClassDefinition = [{ - std::string $cppClass::getPtx() { - StringRef ptxInstStr = getPtxCode(); - return std::string(ptxInstStr.data()); - } - }]; -} - //===----------------------------------------------------------------------===// // NVVM approximate op definitions //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h b/mlir/include/mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h index 34a94e6ea7051..f1100d5cf8b68 100644 --- a/mlir/include/mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h +++ b/mlir/include/mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h @@ -407,22 +407,13 @@ void populateVectorTransposeNarrowTypeRewritePatterns( RewritePatternSet &patterns, PatternBenefit benefit = 1); /// Initialize `typeConverter` and `conversionTarget` for vector linearization. -/// -/// Definition: here 'linearization' means converting a single operation with -/// 1+ vector operand/result of rank>1, into a new single operation whose -/// vector operands and results are all of rank<=1. -/// -/// This function registers (1) which operations are legal, and hence should not -/// be linearized, (2) what the converted types are (rank-1 vectors) and how to +/// This registers (1) which operations are legal and hence should not be +/// linearized, (2) what converted types are (rank-1 vectors) and how to /// materialze the conversion (with shape_cast) /// /// Note: the set of legal operations can be extended by a user if for example -/// certain rank>1 vectors are considered valid, by adding additional +/// certain rank>1 vectors are considered valid, but adding additional /// dynamically legal ops to `conversionTarget`. -/// -/// Further note: the choice to use a dialect conversion design for -/// linearization is to make it easy to reuse generic structural type -/// conversions for linearizing scf/cf/func operations void populateForVectorLinearize(TypeConverter &typeConverter, ConversionTarget &conversionTarget); diff --git a/mlir/include/mlir/Dialect/X86Vector/X86Vector.td b/mlir/include/mlir/Dialect/X86Vector/X86Vector.td index 25d9c404f0181..4f8301f9380b8 100644 --- a/mlir/include/mlir/Dialect/X86Vector/X86Vector.td +++ b/mlir/include/mlir/Dialect/X86Vector/X86Vector.td @@ -83,10 +83,7 @@ def MaskCompressOp : AVX512_Op<"mask.compress", [Pure, } }]; let extraClassDeclaration = [{ - SmallVector getIntrinsicOperands( - ::mlir::ArrayRef operands, - const ::mlir::LLVMTypeConverter &typeConverter, - ::mlir::RewriterBase &rewriter); + SmallVector getIntrinsicOperands(::mlir::RewriterBase&, const LLVMTypeConverter&); }]; } @@ -407,10 +404,7 @@ def DotOp : AVX_LowOp<"dot", [Pure, } }]; let extraClassDeclaration = [{ - SmallVector getIntrinsicOperands( - ::mlir::ArrayRef operands, - const ::mlir::LLVMTypeConverter &typeConverter, - ::mlir::RewriterBase &rewriter); + SmallVector getIntrinsicOperands(::mlir::RewriterBase&, const LLVMTypeConverter&); }]; } @@ -458,10 +452,7 @@ def BcstToPackedF32Op : AVX_Op<"bcst_to_f32.packed", [MemoryEffects<[MemRead]>, }]; let extraClassDeclaration = [{ - SmallVector getIntrinsicOperands( - ::mlir::ArrayRef operands, - const ::mlir::LLVMTypeConverter &typeConverter, - ::mlir::RewriterBase &rewriter); + SmallVector getIntrinsicOperands(::mlir::RewriterBase&, const LLVMTypeConverter&); }]; } @@ -509,10 +500,7 @@ def CvtPackedEvenIndexedToF32Op : AVX_Op<"cvt.packed.even.indexed_to_f32", [Memo }]; let extraClassDeclaration = [{ - SmallVector getIntrinsicOperands( - ::mlir::ArrayRef operands, - const ::mlir::LLVMTypeConverter &typeConverter, - ::mlir::RewriterBase &rewriter); + SmallVector getIntrinsicOperands(::mlir::RewriterBase&, const LLVMTypeConverter&); }]; } @@ -555,10 +543,7 @@ def CvtPackedOddIndexedToF32Op : AVX_Op<"cvt.packed.odd.indexed_to_f32", [Memory }]; let extraClassDeclaration = [{ - SmallVector getIntrinsicOperands( - ::mlir::ArrayRef operands, - const ::mlir::LLVMTypeConverter &typeConverter, - ::mlir::RewriterBase &rewriter); + SmallVector getIntrinsicOperands(::mlir::RewriterBase&, const LLVMTypeConverter&); }]; } #endif // X86VECTOR_OPS diff --git a/mlir/include/mlir/Dialect/X86Vector/X86VectorInterfaces.td b/mlir/include/mlir/Dialect/X86Vector/X86VectorInterfaces.td index cde9d1dce65ee..5176f4a447b6e 100644 --- a/mlir/include/mlir/Dialect/X86Vector/X86VectorInterfaces.td +++ b/mlir/include/mlir/Dialect/X86Vector/X86VectorInterfaces.td @@ -58,11 +58,9 @@ def OneToOneIntrinsicOpInterface : OpInterface<"OneToOneIntrinsicOp"> { }], /*retType=*/"SmallVector", /*methodName=*/"getIntrinsicOperands", - /*args=*/(ins "::mlir::ArrayRef":$operands, - "const ::mlir::LLVMTypeConverter &":$typeConverter, - "::mlir::RewriterBase &":$rewriter), + /*args=*/(ins "::mlir::RewriterBase &":$rewriter, "const LLVMTypeConverter &":$typeConverter), /*methodBody=*/"", - /*defaultImplementation=*/"return SmallVector(operands);" + /*defaultImplementation=*/"return SmallVector($_op->getOperands());" >, ]; } diff --git a/mlir/include/mlir/IR/OpImplementation.h b/mlir/include/mlir/IR/OpImplementation.h index 8b56d81c8eecc..6efad01dec4cc 100644 --- a/mlir/include/mlir/IR/OpImplementation.h +++ b/mlir/include/mlir/IR/OpImplementation.h @@ -656,12 +656,6 @@ class AsmParser { /// Parse a '+' token if present. virtual ParseResult parseOptionalPlus() = 0; - /// Parse a '/' token. - virtual ParseResult parseSlash() = 0; - - /// Parse a '/' token if present. - virtual ParseResult parseOptionalSlash() = 0; - /// Parse a '-' token. virtual ParseResult parseMinus() = 0; diff --git a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h index 97ae14aa0d6af..30c190e50a4f7 100644 --- a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h +++ b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h @@ -142,20 +142,21 @@ class ModuleTranslation { auto result = unresolvedBlockAddressMapping.try_emplace(op, cst); (void)result; assert(result.second && - "attempting to map a blockaddress operation that is already mapped"); + "attempting to map a blockaddress that is already mapped"); } - /// Maps a BlockAddressAttr to its corresponding LLVM basic block. - void mapBlockAddress(BlockAddressAttr attr, llvm::BasicBlock *block) { - auto result = blockAddressToLLVMMapping.try_emplace(attr, block); - (void)result; - assert(result.second && - "attempting to map a blockaddress attribute that is already mapped"); + /// Maps a blockaddress operation to its corresponding placeholder LLVM + /// value. + void mapBlockTag(BlockAddressAttr attr, BlockTagOp blockTag) { + // Attempts to map already mapped block labels which is fine if the given + // labels are verified to be unique. + blockTagMapping[attr] = blockTag; } - /// Finds the LLVM basic block that corresponds to the given BlockAddressAttr. - llvm::BasicBlock *lookupBlockAddress(BlockAddressAttr attr) const { - return blockAddressToLLVMMapping.lookup(attr); + /// Finds an MLIR block that corresponds to the given MLIR call + /// operation. + BlockTagOp lookupBlockTag(BlockAddressAttr attr) const { + return blockTagMapping.lookup(attr); } /// Removes the mapping for blocks contained in the region and values defined @@ -462,9 +463,10 @@ class ModuleTranslation { /// mapping is used to replace the placeholders with the LLVM block addresses. DenseMap unresolvedBlockAddressMapping; - /// Mapping from a BlockAddressAttr attribute to it's matching LLVM basic - /// block. - DenseMap blockAddressToLLVMMapping; + /// Mapping from a BlockAddressAttr attribute to a matching BlockTagOp. This + /// is used to cache BlockTagOp locations instead of walking a LLVMFuncOp in + /// search for those. + DenseMap blockTagMapping; /// Stack of user-specified state elements, useful when translating operations /// with regions. diff --git a/mlir/lib/AsmParser/AsmParserImpl.h b/mlir/lib/AsmParser/AsmParserImpl.h index eec2702cba343..1f8fbfdd93568 100644 --- a/mlir/lib/AsmParser/AsmParserImpl.h +++ b/mlir/lib/AsmParser/AsmParserImpl.h @@ -206,16 +206,6 @@ class AsmParserImpl : public BaseT { return success(parser.consumeIf(Token::question)); } - /// Parses a '/' token. - ParseResult parseSlash() override { - return parser.parseToken(Token::slash, "expected '/'"); - } - - /// Parses a '/' if present. - ParseResult parseOptionalSlash() override { - return success(parser.consumeIf(Token::slash)); - } - /// Parses a '*' token. ParseResult parseStar() override { return parser.parseToken(Token::star, "expected '*'"); diff --git a/mlir/lib/AsmParser/Lexer.cpp b/mlir/lib/AsmParser/Lexer.cpp index 751bd63e537f8..b4189181a8495 100644 --- a/mlir/lib/AsmParser/Lexer.cpp +++ b/mlir/lib/AsmParser/Lexer.cpp @@ -157,7 +157,7 @@ Token Lexer::lexToken() { skipComment(); continue; } - return formToken(Token::slash, tokStart); + return emitError(tokStart, "unexpected character"); case '@': return lexAtIdentifier(tokStart); diff --git a/mlir/lib/AsmParser/TokenKinds.def b/mlir/lib/AsmParser/TokenKinds.def index fe7c53753e156..49da8c3dea5fa 100644 --- a/mlir/lib/AsmParser/TokenKinds.def +++ b/mlir/lib/AsmParser/TokenKinds.def @@ -70,7 +70,6 @@ TOK_PUNCTUATION(question, "?") TOK_PUNCTUATION(r_brace, "}") TOK_PUNCTUATION(r_paren, ")") TOK_PUNCTUATION(r_square, "]") -TOK_PUNCTUATION(slash, "/") TOK_PUNCTUATION(star, "*") TOK_PUNCTUATION(vertical_bar, "|") diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp index c11f1bca5d49d..62c1857e4b1da 100644 --- a/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp @@ -126,10 +126,11 @@ void AffineDataCopyGeneration::runOnBlock(Block *block, // moment; we do a check later and report an error with location info. // Get to the first load, store, or for op (that is not a copy nest itself). - auto curBegin = llvm::find_if(*block, [&](Operation &op) { - return isa(op) && - copyNests.count(&op) == 0; - }); + auto curBegin = + std::find_if(block->begin(), block->end(), [&](Operation &op) { + return isa(op) && + copyNests.count(&op) == 0; + }); // Create [begin, end) ranges. auto it = curBegin; diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp index a9370dc003830..fbe7593420102 100644 --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -718,54 +718,6 @@ static Operation *replaceForAllWithNewSignature( return newforallOp; } -/// Given two operands coming from a loop iter arg, 'src' and 'dst', return true -/// if the operand 'src' is equal to 'dst' or equal to a iter arg present in a -/// outer loop. To determine the second condition, this function iterates -/// using a worklist over the enclosing loops, trying to find 'src' in any of -/// the parent loop's iter args. -static bool sameOrEquivalentIterArg(Value src, Value dst) { - // Stack like vector containing possible iterArgs candidates. The first one - // is dst, and we will transverse the IR from there. - SmallVector destWorklist; - destWorklist.push_back(dst); - - while (!destWorklist.empty()) { - Value currentDst = destWorklist.pop_back_val(); - - // We have found the same operand in some iter arg in the loop structure, - // so src and dst are equivalent. - if (src == currentDst) - return true; - - // The operands are not equivalent, look for enclosing loops over - // currentDst. - auto bbArg = dyn_cast(currentDst); - if (!bbArg) - continue; - - Block *parentBlock = bbArg.getOwner(); - assert(parentBlock && "unlinked block argument"); - - Operation *parentOp = parentBlock->getParentOp(); - assert(parentOp && "expected block argument with parent operation"); - - // Check if parent is loop-like. If it's not, do not add it to the worklist. - auto parentLoop = dyn_cast(parentOp); - if (!parentLoop) - continue; - - for (auto innerIterArg : parentLoop.getRegionIterArgs()) { - // No need to check for null as innerIterArg is tied to parentLoop. - OpOperand *operand = parentLoop.getTiedLoopInit(innerIterArg); - Value loopBlockArgument = - parentLoop->getOperand(operand->getOperandNumber()); - destWorklist.push_back(loopBlockArgument); - } - } - - return false; -} - /// Find the first "extract" user of `producerOp` and tile it right before its /// use. The tiled op is fused under the `containingOp`. /// Return this fused op on success or nullptr if anything fails. @@ -803,40 +755,6 @@ tileAndFuseFirstExtractUse(RewriterBase &rewriter, Diagnostic &diag, OpBuilder::InsertionGuard guard(rewriter); rewriter.setInsertionPoint(sliceOpToTile); - // Clone the producer inside the consumer and try to update the producer init - // operands using the loop bbArgs if applicable. More precisely, if the bbArg - // of the container loop points to a value that it is used by the consumer op, - // then, instead of using such value on the consumer, use the value coming - // from the bbArg instead. This allows to reuse the output tensor (instead of - // creating a new one) of the container when both producer and container write - // to the same output. - if (LoopLikeOpInterface containerLoop = - dyn_cast(sliceOpToTile->getParentOp())) { - Operation *clone = rewriter.clone(*producerOp); - rewriter.modifyOpInPlace(clone, [&]() { - // Iterate over the outputs of the producer and over the loop bbArgs and - // check if any bbArg points to the same value as the producer output. In - // such case, make the producer output point to the bbArg directly. - for (OpOperand &initOperandPtr : - cast(clone).getDpsInitsMutable()) { - Value producerOperand = - clone->getOperand(initOperandPtr.getOperandNumber()); - for (BlockArgument containerIterArg : - containerLoop.getRegionIterArgs()) { - OpOperand *bbArg = containerLoop.getTiedLoopInit(containerIterArg); - Value consumerOperand = - containerLoop->getOperand(bbArg->getOperandNumber()); - // The producer has the same init as the loop bbArg, use it. - if (sameOrEquivalentIterArg(producerOperand, consumerOperand)) { - initOperandPtr.set(containerIterArg); - } - } - } - }); - - tileableProducer = dyn_cast(clone); - } - // Tile the producer. int64_t resultNumber = cast(sliceOpToTile.getSource()).getResultNumber(); @@ -879,10 +797,6 @@ tileAndFuseFirstExtractUse(RewriterBase &rewriter, Diagnostic &diag, rewriter, diag, producerOp, containingOp, *tileAndFuseResult, resultNumber, offsets, sizes); - // Cleanup clone. - if (dyn_cast(containingOp)) - rewriter.eraseOp(tileableProducer); - return std::make_tuple(tileAndFuseResult->tiledOps, newContainingOp); } diff --git a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp index dd8ef9608a821..26904f1f40d12 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp @@ -312,17 +312,10 @@ static GenericOp packGenericOp(RewriterBase &rewriter, GenericOp genericOp, SmallVector inputOperands; SmallVector inputOperandsFromUnpackedSource; SmallVector indexingMaps; - auto hasEquivalentTiles = [](PackOp packOp, UnPackOp unPackOp) { - return packOp.getOuterDimsPerm() == unPackOp.getOuterDimsPerm() && - packOp.getInnerDimsPos() == unPackOp.getInnerDimsPos() && - llvm::equal(packOp.getMixedTiles(), unPackOp.getMixedTiles()); - }; for (OpOperand *inputOperand : genericOp.getDpsInputOperands()) { auto [packedOperand, packedIndexingMap] = getOrCreatePackedViewOfOperand( rewriter, loc, packInfo, genericOp, inputOperand); - auto unpackOp = inputOperand->get().getDefiningOp(); - auto packOp = packedOperand.getDefiningOp(); - if (packOp && unpackOp && hasEquivalentTiles(packOp, unpackOp)) { + if (auto unpackOp = inputOperand->get().getDefiningOp()) { inputOperandsFromUnpackedSource.push_back(unpackOp.getSource()); } else { inputOperandsFromUnpackedSource.push_back(packedOperand); @@ -331,16 +324,14 @@ static GenericOp packGenericOp(RewriterBase &rewriter, GenericOp genericOp, indexingMaps.push_back(packedIndexingMap); } - // If the unpack->pack sequences can be folded, replace use the sources of - // the unpack ops in any unpack->pack chains on the generic op operands. + // If the pack and unpack op can be folded: + // 1) use unpack op source op for operand to fold unpack -> pack sequence. + // 2) init tensor of the generic op can be replaced by the destination of the + // pack op. if (isFoldableUnpackPack) { inputOperands = inputOperandsFromUnpackedSource; - if (auto destPack = dest.getDefiningOp()) { - auto destUnPack = destPack.getSource().getDefiningOp(); - if (destUnPack && hasEquivalentTiles(destPack, destUnPack)) { - dest = destUnPack.getSource(); - } - } + if (auto destPack = dest.getDefiningOp()) + dest = destPack.getDest(); } int64_t numInnerLoops = packInfo.getNumTiledLoops(); diff --git a/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp index e8d460020cf69..0cc840403a020 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp @@ -511,7 +511,7 @@ struct LinalgOpPartialReductionInterface for (auto [resultNum, dimExpr] : llvm::enumerate(partialMap.getResults())) { unsigned dim = cast(dimExpr).getPosition(); - if (llvm::is_contained(reductionDims, dim)) { + if (llvm::find(reductionDims, dim) != reductionDims.end()) { partialReductionDims.push_back(resultNum); } } @@ -553,7 +553,7 @@ struct LinalgOpPartialReductionInterface unsigned dim = cast(dimExpr).getPosition(); resultSizes.push_back(sizes[dim]); - if (llvm::is_contained(reductionDims, dim)) { + if (llvm::find(reductionDims, dim) != reductionDims.end()) { // Reduction dims are reduced, and are always outputed in the same // place. So use offset 0 for them. resultOffsets.push_back(b.getIndexAttr(0)); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index c5b62227777a7..63f88d02ff3a0 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -1506,69 +1506,72 @@ static SmallVector getTiledPackShape(linalg::PackOp packOp, return applyPermutation(destShape, linalg::getPackInverseDestPerm(packOp)); } -/// Creates an optionally masked TransferWriteOp +/// Creates a TransferWriteOp to write `input` into a newly initialized +/// output tensor. /// -/// Generates the following operation: -/// %res = vector.transfer_write %vectorToStore into %dest +/// Given: +/// - an input vector to write, +/// - the mixed destination sizes for the output tensor, +/// - and the vector sizes used for vectorization (i.e., the leading N dims, +/// for some value of N), +/// +/// this function generates the following sequence of ops: +/// +/// %dest = tensor.empty(%destSizes) +/// %res = vector.transfer_write %input into %dest /// /// If the leading N dimensions of the destination tensor do not match -/// `inputVecSizesForLeadingDims` (N = rank(inputVecSizesForLeadingDims)), -/// masking is applied to ensure correctness: +/// `inputVecSizesForLeadingDims` (where N = +/// rank(`inputVecSizesForLeadingDims`)), masking is applied to ensure +/// correctness: /// -/// %mask = vector.create_mask(%destShape) -/// %res = vector.mask %mask { -/// vector.transfer_write %vectorToStore into %dest -/// } +/// %dest = tensor.empty(%destSizes) +/// %write = vector.transfer_write %input into %dest +/// %mask = vector.create_mask(%destSizes) +/// %res = vector.mask %mask { %write } /// /// If `useInBoundsInsteadOfMasking` is set to `true`, the `in_bounds` attribute /// is used instead of masking: /// -/// %write = vector.transfer_write %vectorToStore into %dest +/// %dest = tensor.empty(%destSizes) /// in_bounds_flags = (...) /// %res = vector.transfer_write %input into %dest /// {in_bounds = in_bounds_flags} /// -/// NOTE: All write offsets are set to 0. -/// TODO: Allow specyfying write offsets. +/// NOTE: all write offsets are set to 0. /// NOTE: When N < rank(input), the missing vector sizes are effectively /// extracted from the trailing sizes of `destSizes`. This means those sizes -/// must be static. -/// TODO: Support cases where an arbitrary dim is dynamic - this will require -/// specifying all the vector sizes. +/// must be static. Supporting dynamic sizes will require the user to specify +/// the remaining vector sizes. This is left as a TODO. static Operation * -createWriteOrMaskedWrite(OpBuilder &builder, Location loc, Value vectorToStore, - Value dest, +createWriteOrMaskedWrite(OpBuilder &builder, Location loc, Value input, + SmallVector destSizes, ArrayRef inputVecSizesForLeadingDims, bool useInBoundsInsteadOfMasking = false) { - ShapedType destType = cast(dest.getType()); - assert(cast(vectorToStore.getType()).getRank() == - static_cast(destType.getRank()) && + auto inputType = cast(input.getType()); + assert(inputType.getRank() == static_cast(destSizes.size()) && "Rank mismatch!"); - (void)destType; + Value dest = builder.create(loc, destSizes, + inputType.getElementType()); int64_t rank = cast(dest.getType()).getRank(); + auto zero = builder.create(loc, 0); auto destShape = cast(dest.getType()).getShape(); - - // Compute the in_bounds attribute SmallVector inBoundsVal(rank, true); if (useInBoundsInsteadOfMasking) { // In this case, assume that all the required vector sizes have been // provided. - assert(inputVecSizesForLeadingDims.size() == - static_cast(destType.getRank()) && + assert(inputVecSizesForLeadingDims.size() == destSizes.size() && "Insufficient number of input vector sizes!"); // Update the inBounds attribute. for (unsigned i = 0; i < rank; i++) inBoundsVal[i] = (destShape[i] == inputVecSizesForLeadingDims[i]) && !ShapedType::isDynamic(destShape[i]); } - - // Generate the xfer_write Op - auto zero = builder.create(loc, 0); Operation *write = builder.create( loc, - /*vector=*/vectorToStore, + /*vector=*/input, /*source=*/dest, /*indices=*/SmallVector(rank, zero), /*inBounds=*/inBoundsVal); @@ -1576,17 +1579,11 @@ createWriteOrMaskedWrite(OpBuilder &builder, Location loc, Value vectorToStore, destShape.drop_front(inputVecSizesForLeadingDims.size()), [](int64_t size) { return size == ShapedType::kDynamic; }) && "Only dims aligned with inputVecSizesForLeadingDims may be dynamic"); - - // If masking is disabled, exit. if (useInBoundsInsteadOfMasking) return write; - - // Check if masking is needed. bool needMaskForWrite = !llvm::equal(inputVecSizesForLeadingDims, destShape.take_front(inputVecSizesForLeadingDims.size())); - - // If masking is needed, generate the mask and mask the operation. if (needMaskForWrite) { SmallVector writeMaskShape; writeMaskShape.append(inputVecSizesForLeadingDims.begin(), @@ -1595,11 +1592,10 @@ createWriteOrMaskedWrite(OpBuilder &builder, Location loc, Value vectorToStore, inputVecSizesForLeadingDims.size(), destShape.end()); auto writeMaskType = VectorType::get(writeMaskShape, builder.getI1Type()); - Value maskForWrite = builder.create( - loc, writeMaskType, tensor::getMixedSizes(builder, loc, dest)); + Value maskForWrite = + builder.create(loc, writeMaskType, destSizes); write = mlir::vector::maskOperation(builder, write, maskForWrite); } - return write; } @@ -1697,11 +1693,9 @@ vectorizeAsTensorPackOp(RewriterBase &rewriter, linalg::PackOp packOp, loc, shapeCastOp.getResult(), destPermutation); // Create TransferWriteOp. - Value dest = rewriter.create( - loc, reifiedReturnShapes[0], - transposeOp.getResult().getType().getElementType()); Operation *write = - createWriteOrMaskedWrite(rewriter, loc, transposeOp.getResult(), dest, + createWriteOrMaskedWrite(rewriter, loc, transposeOp.getResult(), + /*destSizes=*/reifiedReturnShapes[0], /*inputVecSizesForLeadingDims=*/inputVectorSizes, /*useInBoundsInsteadOfMasking=*/false); newResults.push_back(write->getResult(0)); @@ -1836,13 +1830,10 @@ vectorizeAsTensorUnpackOp(RewriterBase &rewriter, linalg::UnPackOp unpackOp, unpackOp.getDestType().hasStaticShape() ? vectorSizes : shapeCastOp.getResultVectorType().getShape()); - Value dest = rewriter.create( - loc, reifiedRetShapes[0], - shapeCastOp.getResult().getType().getElementType()); - Operation *write = - createWriteOrMaskedWrite(rewriter, loc, shapeCastOp.getResult(), dest, - /*inputVecSizesForLeadingDims=*/writeVectorSizes, - useInBoundsInsteadOfMasking); + Operation *write = createWriteOrMaskedWrite( + rewriter, loc, shapeCastOp.getResult(), /*destSizes=*/reifiedRetShapes[0], + /*inputVecSizesForLeadingDims=*/writeVectorSizes, + useInBoundsInsteadOfMasking); newResults.push_back(write->getResult(0)); return success(); } @@ -1870,14 +1861,10 @@ vectorizeAsTensorPadOp(RewriterBase &rewriter, tensor::PadOp padOp, auto maskedRead = vector::createReadOrMaskedRead( rewriter, loc, padOp.getSource(), inputVectorSizes, padValue, /*useInBoundsInsteadOfMasking=*/false); - - // Create Xfer write Op - Value dest = rewriter.create( - loc, reifiedReturnShapes[0], padOp.getResultType().getElementType()); - Operation *write = - createWriteOrMaskedWrite(rewriter, loc, maskedRead, dest, - /*inputVecSizesForLeadingDims=*/inputVectorSizes, - /*useInBoundsInsteadOfMasking=*/false); + Operation *write = createWriteOrMaskedWrite( + rewriter, loc, maskedRead, reifiedReturnShapes[0], + /*inputVecSizesForLeadingDims=*/inputVectorSizes, + /*useInBoundsInsteadOfMasking=*/false); newResults.push_back(write->getResult(0)); return success(); } diff --git a/mlir/lib/Dialect/Mesh/IR/MeshOps.cpp b/mlir/lib/Dialect/Mesh/IR/MeshOps.cpp index 2bdb58892937f..1a1334f0ea474 100644 --- a/mlir/lib/Dialect/Mesh/IR/MeshOps.cpp +++ b/mlir/lib/Dialect/Mesh/IR/MeshOps.cpp @@ -1505,7 +1505,7 @@ LogicalResult ShiftOp::verifySymbolUses(SymbolTableCollection &symbolTable) { auto meshAxes = getMeshAxes(); auto shiftAxis = getShiftAxis().getZExtValue(); - if (!llvm::is_contained(meshAxes, shiftAxis)) { + if (llvm::find(meshAxes, shiftAxis) == meshAxes.end()) { return emitError() << "Invalid shift axis " << shiftAxis << ". It must be one of the grouping mesh axes."; } diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index deff86d5c5ecb..2bf7aaa46db11 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -1521,9 +1521,6 @@ static ParseResult parseMapClause(OpAsmParser &parser, IntegerAttr &mapType) { if (mapTypeMod == "delete") mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_DELETE; - if (mapTypeMod == "return_param") - mapTypeBits |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM; - return success(); }; @@ -1586,12 +1583,6 @@ static void printMapClause(OpAsmPrinter &p, Operation *op, emitAllocRelease = false; mapTypeStrs.push_back("delete"); } - if (mapTypeToBitFlag( - mapTypeBits, - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM)) { - emitAllocRelease = false; - mapTypeStrs.push_back("return_param"); - } if (emitAllocRelease) mapTypeStrs.push_back("exit_release_or_enter_alloc"); @@ -1786,17 +1777,6 @@ static LogicalResult verifyPrivateVarsMapping(TargetOp targetOp) { // MapInfoOp //===----------------------------------------------------------------------===// -static LogicalResult verifyMapInfoDefinedArgs(Operation *op, - StringRef clauseName, - OperandRange vars) { - for (Value var : vars) - if (!llvm::isa_and_present(var.getDefiningOp())) - return op->emitOpError() - << "'" << clauseName - << "' arguments must be defined by 'omp.map.info' ops"; - return success(); -} - LogicalResult MapInfoOp::verify() { if (getMapperId() && !SymbolTable::lookupNearestSymbolFrom( @@ -1804,9 +1784,6 @@ LogicalResult MapInfoOp::verify() { return emitError("invalid mapper id"); } - if (failed(verifyMapInfoDefinedArgs(*this, "members", getMembers()))) - return failure(); - return success(); } @@ -1828,15 +1805,6 @@ LogicalResult TargetDataOp::verify() { "At least one of map, use_device_ptr_vars, or " "use_device_addr_vars operand must be present"); } - - if (failed(verifyMapInfoDefinedArgs(*this, "use_device_ptr", - getUseDevicePtrVars()))) - return failure(); - - if (failed(verifyMapInfoDefinedArgs(*this, "use_device_addr", - getUseDeviceAddrVars()))) - return failure(); - return verifyMapClause(*this, getMapVars()); } @@ -1921,15 +1889,16 @@ void TargetOp::build(OpBuilder &builder, OperationState &state, } LogicalResult TargetOp::verify() { - if (failed(verifyDependVarList(*this, getDependKinds(), getDependVars()))) - return failure(); + LogicalResult verifyDependVars = + verifyDependVarList(*this, getDependKinds(), getDependVars()); - if (failed(verifyMapInfoDefinedArgs(*this, "has_device_addr", - getHasDeviceAddrVars()))) - return failure(); + if (failed(verifyDependVars)) + return verifyDependVars; - if (failed(verifyMapClause(*this, getMapVars()))) - return failure(); + LogicalResult verifyMapVars = verifyMapClause(*this, getMapVars()); + + if (failed(verifyMapVars)) + return verifyMapVars; return verifyPrivateVarsMapping(*this); } diff --git a/mlir/lib/Dialect/Quant/IR/QuantOps.cpp b/mlir/lib/Dialect/Quant/IR/QuantOps.cpp index e23a0d6aba825..94e1c8b8ba296 100644 --- a/mlir/lib/Dialect/Quant/IR/QuantOps.cpp +++ b/mlir/lib/Dialect/Quant/IR/QuantOps.cpp @@ -122,7 +122,7 @@ LogicalResult verifySubChannelQuantization( // // Therefore, we explicitly disallow the case where d = 0 to maintain // consistency and avoid these issues. - if (llvm::is_contained(tensorType.getShape(), 0)) { + if (llvm::find(tensorType.getShape(), 0) != tensorType.getShape().end()) { return op->emitError() << "tensor dimension size of zero is not allowed " "with sub-channel quantization"; } diff --git a/mlir/lib/Dialect/SCF/Transforms/StructuralTypeConversions.cpp b/mlir/lib/Dialect/SCF/Transforms/StructuralTypeConversions.cpp index 09326242eec2a..40d2e254fb7dd 100644 --- a/mlir/lib/Dialect/SCF/Transforms/StructuralTypeConversions.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/StructuralTypeConversions.cpp @@ -99,7 +99,7 @@ class ConvertForOpTypes // PR47938 tracks this issue, but it seems hard to fix. Instead, we need // to clone the op. // - // 2. We need to reuse the original region instead of cloning it, otherwise + // 2. We need to resue the original region instead of cloning it, otherwise // the dialect conversion framework thinks that we just inserted all the // cloned child ops. But what we want is to "take" the child regions and let // the dialect conversion framework continue recursively into ops inside diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp index 2196199816292..fcbef0c14739f 100644 --- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp @@ -791,7 +791,7 @@ LogicalResult SparseTensorEncodingAttr::verify( return emitError() << "unexpected coordinate bitwidth: " << crdWidth; // Verify every COO segment. - auto *it = llvm::find_if(lvlTypes, isSingletonLT); + auto *it = std::find_if(lvlTypes.begin(), lvlTypes.end(), isSingletonLT); while (it != lvlTypes.end()) { if (it == lvlTypes.begin() || !(it - 1)->isa()) @@ -829,7 +829,7 @@ LogicalResult SparseTensorEncodingAttr::verify( } // TODO: audit formats that actually are supported by backend. - if (auto it = llvm::find_if(lvlTypes, isNOutOfMLT); + if (auto it = std::find_if(lvlTypes.begin(), lvlTypes.end(), isNOutOfMLT); it != std::end(lvlTypes)) { if (it != lvlTypes.end() - 1) return emitError() << "expected n_out_of_m to be the last level type"; diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp index 79bf87ccd34af..f6c3c6a61afb6 100644 --- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp @@ -5573,11 +5573,13 @@ LogicalResult ShapeCastOp::verify() { return success(); } +namespace { + /// Return true if `transpose` does not permute a pair of non-unit dims. /// By `order preserving` we mean that the flattened versions of the input and /// output vectors are (numerically) identical. In other words `transpose` is /// effectively a shape cast. -static bool isOrderPreserving(TransposeOp transpose) { +bool isOrderPreserving(TransposeOp transpose) { ArrayRef permutation = transpose.getPermutation(); VectorType sourceType = transpose.getSourceVectorType(); ArrayRef inShape = sourceType.getShape(); @@ -5597,6 +5599,8 @@ static bool isOrderPreserving(TransposeOp transpose) { return true; } +} // namespace + OpFoldResult ShapeCastOp::fold(FoldAdaptor adaptor) { VectorType resultType = getType(); @@ -5993,22 +5997,18 @@ OpFoldResult vector::TransposeOp::fold(FoldAdaptor adaptor) { if (llvm::dyn_cast_if_present(adaptor.getVector())) return ub::PoisonAttr::get(getContext()); - // Eliminate identity transposes, and more generally any transposes that - // preserves the shape without permuting elements. - // - // Examples of what to fold: - // %0 = vector.transpose %arg, [0, 1] : vector<1x1xi8> to vector<1x1xi8> - // %0 = vector.transpose %arg, [0, 1] : vector<2x2xi8> to vector<2x2xi8> - // %0 = vector.transpose %arg, [1, 0] : vector<1x1xi8> to vector<1x1xi8> - // - // Example of what NOT to fold: - // %0 = vector.transpose %arg, [1, 0] : vector<2x2xi8> to vector<2x2xi8> - // - if (getSourceVectorType() == getResultVectorType() && - isOrderPreserving(*this)) - return getVector(); + // Eliminate identity transpose ops. This happens when the dimensions of the + // input vector remain in their original order after the transpose operation. + ArrayRef perm = getPermutation(); - return {}; + // Check if the permutation of the dimensions contains sequential values: + // {0, 1, 2, ...}. + for (int64_t i = 0, e = perm.size(); i < e; i++) { + if (perm[i] != i) + return {}; + } + + return getVector(); } LogicalResult vector::TransposeOp::verify() { diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp index 060ce7d1d6643..b9cef003fa365 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp @@ -109,110 +109,17 @@ struct LinearizeVectorizable final } }; -template -static bool stridesAllOne(TOp op) { - static_assert( - std::is_same_v || - std::is_same_v, - "expected vector.extract_strided_slice or vector.insert_strided_slice"); - ArrayAttr strides = op.getStrides(); - return llvm::all_of( - strides, [](auto stride) { return isConstantIntValue(stride, 1); }); -} - -/// Convert an array of attributes into a vector of integers, if possible. -static FailureOr> intsFromArrayAttr(ArrayAttr attrs) { - if (!attrs) - return failure(); - SmallVector ints; - ints.reserve(attrs.size()); - for (auto attr : attrs) { - if (auto intAttr = dyn_cast(attr)) { - ints.push_back(intAttr.getInt()); - } else { - return failure(); - } - } - return ints; -} - -/// Consider inserting a vector of shape `small` into a vector of shape `large`, -/// at position `offsets`: this function enumeratates all the indices in `large` -/// that are written to. The enumeration is with row-major ordering. -/// -/// Example: insert a 1x2 vector into a 4x5 vector at position (1,3). The 2 -/// positions written to are (1,3) and (1,4), which have linearized indices 8 -/// and 9. So [8,9] is returned. -/// -/// The length of the returned vector is equal to the number of elements in -/// the shape `small` (i.e. the product of dimensions of `small`). -SmallVector static getStridedSliceInsertionIndices( - ArrayRef small, ArrayRef large, - ArrayRef offsets) { - - // Example of alignment between, `large`, `small` and `offsets`: - // large = 4, 5, 6, 7, 8 - // small = 1, 6, 7, 8 - // offsets = 2, 3, 0 - // - // `offsets` has implicit trailing 0s, `small` has implicit leading 1s. - assert((large.size() >= small.size()) && - "rank of 'large' cannot be lower than rank of 'small'"); - assert((large.size() >= offsets.size()) && - "rank of 'large' cannot be lower than the number of offsets"); - unsigned delta = large.size() - small.size(); - unsigned nOffsets = offsets.size(); - auto getSmall = [&](int64_t i) -> int64_t { - return i >= delta ? small[i - delta] : 1; - }; - auto getOffset = [&](int64_t i) -> int64_t { - return i < nOffsets ? offsets[i] : 0; - }; - - // Using 2 vectors of indices, at each iteration populate the updated set of - // indices based on the old set of indices, and the size of the small vector - // in the current iteration. - SmallVector indices{0}; - int64_t stride = 1; - for (int i = large.size() - 1; i >= 0; --i) { - int64_t currentSize = indices.size(); - int64_t smallSize = getSmall(i); - int64_t nextSize = currentSize * smallSize; - SmallVector nextIndices(nextSize); - int64_t *base = nextIndices.begin(); - int64_t offset = getOffset(i) * stride; - for (int j = 0; j < smallSize; ++j) { - for (int k = 0; k < currentSize; ++k) { - base[k] = indices[k] + offset; - } - offset += stride; - base += currentSize; - } - stride *= large[i]; - indices = std::move(nextIndices); - } - return indices; -} - -/// This pattern converts a vector.extract_strided_slice operation into a -/// vector.shuffle operation that has a rank-1 (linearized) operand and result. -/// -/// For example, the following: -/// -/// ``` +/// This pattern converts the ExtractStridedSliceOp into a ShuffleOp that works +/// on a linearized vector. +/// Following, /// vector.extract_strided_slice %source /// { offsets = [..], strides = [..], sizes = [..] } -/// ``` -/// /// is converted to : -/// ``` /// %source_1d = vector.shape_cast %source -/// %out_1d = vector.shuffle %source_1d, %source_1d [ shuffle_indices_1d ] -/// %out_nd = vector.shape_cast %out_1d -/// ``` -/// -/// `shuffle_indices_1d` is computed using the offsets and sizes of the original -/// vector.extract_strided_slice operation. +/// %out_1d = vector.shuffle %source_1d, %source_1d [ shuffle_indices_1d ] +/// %out_nd = vector.shape_cast %out_1d +/// `shuffle_indices_1d` is computed using the offsets and sizes of the +/// extraction. struct LinearizeVectorExtractStridedSlice final : public mlir::OpConversionPattern { using OpConversionPattern::OpConversionPattern; @@ -222,116 +129,88 @@ struct LinearizeVectorExtractStridedSlice final : OpConversionPattern(typeConverter, context, benefit) {} LogicalResult - matchAndRewrite(vector::ExtractStridedSliceOp extractStridedSliceOp, - OpAdaptor adaptor, + matchAndRewrite(vector::ExtractStridedSliceOp extractOp, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { + VectorType dstType = + getTypeConverter()->convertType(extractOp.getType()); + assert(dstType && "vector type destination expected."); + if (extractOp.getVector().getType().isScalable() || dstType.isScalable()) + return rewriter.notifyMatchFailure(extractOp, + "scalable vectors are not supported."); - VectorType flatOutputType = getTypeConverter()->convertType( - extractStridedSliceOp.getType()); - assert(flatOutputType && "vector type expected"); - - // Expect a legalization failure if the strides are not all 1 (if ever the - // verifier for extract_strided_slice allows non-1 strides). - if (!stridesAllOne(extractStridedSliceOp)) { + ArrayAttr offsets = extractOp.getOffsets(); + ArrayAttr sizes = extractOp.getSizes(); + ArrayAttr strides = extractOp.getStrides(); + if (!isConstantIntValue(strides[0], 1)) return rewriter.notifyMatchFailure( - extractStridedSliceOp, - "extract_strided_slice with strides != 1 not supported"); + extractOp, "Strided slice with stride != 1 is not supported."); + Value srcVector = adaptor.getVector(); + // If kD offsets are specified for nD source vector (n > k), the granularity + // of the extraction is greater than 1. In this case last (n-k) dimensions + // form the extraction granularity. + // Example : + // vector.extract_strided_slice %src { + // offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : + // vector<4x8x8xf32> to vector<2x2x8xf32> + // Here, extraction granularity is 8. + int64_t extractGranularitySize = 1; + int64_t nD = extractOp.getSourceVectorType().getRank(); + int64_t kD = (int64_t)offsets.size(); + int64_t k = kD; + while (k < nD) { + extractGranularitySize *= extractOp.getSourceVectorType().getShape()[k]; + ++k; } - - FailureOr> offsets = - intsFromArrayAttr(extractStridedSliceOp.getOffsets()); - if (failed(offsets)) { - return rewriter.notifyMatchFailure(extractStridedSliceOp, - "failed to get integer offsets"); + // Get total number of extracted slices. + int64_t nExtractedSlices = 1; + for (Attribute size : sizes) { + nExtractedSlices *= cast(size).getInt(); } - - ArrayRef inputShape = - extractStridedSliceOp.getSourceVectorType().getShape(); - - ArrayRef outputShape = extractStridedSliceOp.getType().getShape(); - - SmallVector indices = getStridedSliceInsertionIndices( - outputShape, inputShape, offsets.value()); - - Value srcVector = adaptor.getVector(); - rewriter.replaceOpWithNewOp( - extractStridedSliceOp, flatOutputType, srcVector, srcVector, indices); - return success(); - } -}; - -/// This pattern converts a vector.insert_strided_slice operation into a -/// vector.shuffle operation that has rank-1 (linearized) operands and result. -/// -/// For example, the following: -/// ``` -/// %0 = vector.insert_strided_slice %to_store, %into -/// {offsets = [1, 0, 0, 0], strides = [1, 1]} -/// : vector<2x2xi8> into vector<2x1x3x2xi8> -/// ``` -/// -/// is converted to -/// ``` -/// %to_store_1d -/// = vector.shape_cast %to_store : vector<2x2xi8> to vector<4xi8> -/// %into_1d = vector.shape_cast %into : vector<2x1x3x2xi8> to vector<12xi8> -/// %out_1d = vector.shuffle %into_1d, %to_store_1d [ shuffle_indices_1d ] -/// %out_nd = vector.shape_cast %out_1d : vector<12xi8> to vector<2x1x3x2xi8> -/// ``` -/// -/// where shuffle_indices_1d in this case is -/// [0, 1, 2, 3, 4, 5, 12, 13, 14, 15, 10, 11]. -/// ^^^^^^^^^^^^^^ -/// to_store_1d -/// -struct LinearizeVectorInsertStridedSlice final - : public mlir::OpConversionPattern { - using OpConversionPattern::OpConversionPattern; - LinearizeVectorInsertStridedSlice(const TypeConverter &typeConverter, - MLIRContext *context, - PatternBenefit benefit = 1) - : OpConversionPattern(typeConverter, context, benefit) {} - - LogicalResult - matchAndRewrite(vector::InsertStridedSliceOp insertStridedSliceOp, - OpAdaptor adaptor, - ConversionPatternRewriter &rewriter) const override { - - // Expect a legalization failure if the strides are not all 1 (if ever the - // verifier for insert_strided_slice allows non-1 strides). - if (!stridesAllOne(insertStridedSliceOp)) { - return rewriter.notifyMatchFailure( - insertStridedSliceOp, - "insert_strided_slice with strides != 1 not supported"); + // Compute the strides of the source vector considering first k dimensions. + llvm::SmallVector sourceStrides(kD, extractGranularitySize); + for (int i = kD - 2; i >= 0; --i) { + sourceStrides[i] = sourceStrides[i + 1] * + extractOp.getSourceVectorType().getShape()[i + 1]; } - - VectorType inputType = insertStridedSliceOp.getValueToStore().getType(); - ArrayRef inputShape = inputType.getShape(); - - VectorType outputType = insertStridedSliceOp.getType(); - ArrayRef outputShape = outputType.getShape(); - int64_t nOutputElements = outputType.getNumElements(); - - FailureOr> offsets = - intsFromArrayAttr(insertStridedSliceOp.getOffsets()); - if (failed(offsets)) { - return rewriter.notifyMatchFailure(insertStridedSliceOp, - "failed to get integer offsets"); + // Final shuffle indices has nExtractedSlices * extractGranularitySize + // elements. + llvm::SmallVector indices(nExtractedSlices * + extractGranularitySize); + // Compute the strides of the extracted kD vector. + llvm::SmallVector extractedStrides(kD, 1); + // Compute extractedStrides. + for (int i = kD - 2; i >= 0; --i) { + extractedStrides[i] = + extractedStrides[i + 1] * cast(sizes[i + 1]).getInt(); } - SmallVector sliceIndices = getStridedSliceInsertionIndices( - inputShape, outputShape, offsets.value()); - - SmallVector indices(nOutputElements); - std::iota(indices.begin(), indices.end(), 0); - for (auto [index, sliceIndex] : llvm::enumerate(sliceIndices)) { - indices[sliceIndex] = index + nOutputElements; + // Iterate over all extracted slices from 0 to nExtractedSlices - 1 + // and compute the multi-dimensional index and the corresponding linearized + // index within the source vector. + for (int64_t i = 0; i < nExtractedSlices; ++i) { + int64_t index = i; + // Compute the corresponding multi-dimensional index. + llvm::SmallVector multiDimIndex(kD, 0); + for (int64_t j = 0; j < kD; ++j) { + multiDimIndex[j] = (index / extractedStrides[j]); + index -= multiDimIndex[j] * extractedStrides[j]; + } + // Compute the corresponding linearized index in the source vector + // i.e. shift the multiDimIndex by the offsets. + int64_t linearizedIndex = 0; + for (int64_t j = 0; j < kD; ++j) { + linearizedIndex += + (cast(offsets[j]).getInt() + multiDimIndex[j]) * + sourceStrides[j]; + } + // Fill the indices array form linearizedIndex to linearizedIndex + + // extractGranularitySize. + for (int64_t j = 0; j < extractGranularitySize; ++j) { + indices[i * extractGranularitySize + j] = linearizedIndex + j; + } } - - Value flatToStore = adaptor.getValueToStore(); - Value flatDest = adaptor.getDest(); - rewriter.replaceOpWithNewOp(insertStridedSliceOp, - flatDest.getType(), flatDest, - flatToStore, indices); + // Perform a shuffle to extract the kD vector. + rewriter.replaceOpWithNewOp( + extractOp, dstType, srcVector, srcVector, indices); return success(); } }; @@ -417,7 +296,7 @@ struct LinearizeVectorExtract final // Skip if result is not a vector type if (!isa(extractOp.getType())) return rewriter.notifyMatchFailure(extractOp, - "scalar extract not supported"); + "scalar extract is not supported."); Type dstTy = getTypeConverter()->convertType(extractOp.getType()); assert(dstTy && "expected 1-D vector type"); @@ -566,109 +445,47 @@ struct LinearizeVectorSplat final } }; -/// This pattern converts the CreateMaskOp to work on a linearized vector. -/// It currently supports only 2D masks with a unit outer dimension. -/// Following, -/// vector.create_mask %arg0, %arg1 : vector<1x4xi1> -/// is converted to: -/// %zero = arith.constant 0 : index -/// %cmpi = arith.cmpi sgt, %arg0, %zero : index -/// %index = arith.index_cast %cmpi : i1 to index -/// %mul = arith.andi %index, %arg1 : index -/// %mask = vector.create_mask %mul : vector<4xi1> -/// %shape_cast = vector.shape_cast %mask : vector<4xi1> to vector<1x4xi1> -struct LinearizeVectorCreateMask final - : OpConversionPattern { - using OpConversionPattern::OpConversionPattern; +} // namespace - LinearizeVectorCreateMask(const TypeConverter &typeConverter, - MLIRContext *context, PatternBenefit benefit = 1) - : OpConversionPattern(typeConverter, context, benefit) {} +/// Return true if the operation `op` does not support scalable vectors and +/// has at least 1 scalable vector result. These ops should all eventually +/// support scalable vectors, and this function should be removed. +static bool isNotLinearizableBecauseScalable(Operation *op) { - LogicalResult - matchAndRewrite(vector::CreateMaskOp createMaskOp, OpAdaptor adaptor, - ConversionPatternRewriter &rewriter) const override { - Location loc = createMaskOp.getLoc(); - VectorType srcTy = createMaskOp.getType(); - auto srcShape = srcTy.getShape(); - if (srcShape.size() != 2) - return rewriter.notifyMatchFailure(createMaskOp, - "only 2D mask is supported."); - - if (srcShape[0] != 1) - return rewriter.notifyMatchFailure( - createMaskOp, "only unit outer dimension is supported."); + bool unsupported = + isa( + op); + if (!unsupported) + return false; - auto dstTy = getTypeConverter()->convertType(srcTy); - if (!dstTy) - return rewriter.notifyMatchFailure(createMaskOp, "cannot convert type."); - - // Compare the first operand with 0. If it is greater than 0, the - // corresponding mask element is set to true, otherwise false. - // The result of the comparison is then multiplied with - // the second operand of create_mask to get the 1D mask. - auto firstOperand = adaptor.getOperands().front(); - auto zero = rewriter.create(loc, 0); - auto isNonZero = rewriter.createOrFold( - loc, mlir::arith::CmpIPredicate::sgt, firstOperand, zero); - auto isNonZeroIndex = rewriter.createOrFold( - loc, rewriter.getIndexType(), isNonZero); - auto secondOperand = adaptor.getOperands().back(); - auto maskSize = rewriter.createOrFold( - loc, rewriter.getIndexType(), isNonZeroIndex, secondOperand); - - auto newMask = - rewriter.create(loc, dstTy, maskSize); - rewriter.replaceOp(createMaskOp, newMask); - return success(); - } -}; + // Check if any of the results is a scalable vector type. + auto types = op->getResultTypes(); + bool containsScalableResult = + std::any_of(types.begin(), types.end(), [](Type type) { + auto vecType = dyn_cast(type); + return vecType && vecType.isScalable(); + }); -} // namespace + return containsScalableResult; +} -/// This method defines the set of operations that are linearizable, and hence -/// that are considered illegal for the conversion target. -static bool isLinearizable(Operation *op) { +static bool isNotLinearizable(Operation *op) { // Only ops that are in the vector dialect, are ConstantLike, or // are Vectorizable might be linearized currently. StringLiteral vectorDialect = vector::VectorDialect::getDialectNamespace(); StringRef opDialect = op->getDialect()->getNamespace(); - bool supported = (opDialect == vectorDialect) || - op->hasTrait() || - op->hasTrait(); - if (!supported) - return false; + bool unsupported = (opDialect != vectorDialect) && + !op->hasTrait() && + !op->hasTrait(); + if (unsupported) + return true; + + // Some ops currently don't support scalable vectors. + if (isNotLinearizableBecauseScalable(op)) + return true; - return TypeSwitch(op) - // As type legalization is done with vector.shape_cast, shape_cast - // itself cannot be linearized (will create new shape_casts to linearize - // ad infinitum). - .Case([&](auto) { return false; }) - // The operations - // - vector.extract_strided_slice - // - vector.extract - // - vector.insert_strided_slice - // - vector.insert - // are linearized to a rank-1 vector.shuffle by the current patterns. - // vector.shuffle only supports fixed size vectors, so it is impossible to - // use this approach to linearize these ops if they operate on scalable - // vectors. - .Case( - [&](vector::ExtractStridedSliceOp extractOp) { - return !extractOp.getType().isScalable(); - }) - .Case( - [&](vector::InsertStridedSliceOp insertOp) { - return !insertOp.getType().isScalable(); - }) - .Case([&](vector::InsertOp insertOp) { - return !insertOp.getType().isScalable(); - }) - .Case([&](vector::ExtractOp extractOp) { - return !extractOp.getSourceVectorType().isScalable(); - }) - .Default([&](auto) { return true; }); + return false; } void mlir::vector::populateForVectorLinearize(TypeConverter &typeConverter, @@ -702,7 +519,7 @@ void mlir::vector::populateForVectorLinearize(TypeConverter &typeConverter, target.markUnknownOpDynamicallyLegal( [=](Operation *op) -> std::optional { - if (!isLinearizable(op)) + if (isNotLinearizable(op)) return true; // This will return true if, for all operand and result types `t`, // convertType(t) = t. This is true if there are no rank>=2 vectors. @@ -713,17 +530,15 @@ void mlir::vector::populateForVectorLinearize(TypeConverter &typeConverter, void mlir::vector::populateVectorLinearizeBasePatterns( const TypeConverter &typeConverter, const ConversionTarget &target, RewritePatternSet &patterns) { - patterns - .add( - typeConverter, patterns.getContext()); + patterns.add( + typeConverter, patterns.getContext()); } void mlir::vector::populateVectorLinearizeShuffleLikeOpsPatterns( const TypeConverter &typeConverter, const ConversionTarget &target, RewritePatternSet &patterns) { patterns.add(typeConverter, - patterns.getContext()); + LinearizeVectorInsert, LinearizeVectorExtractStridedSlice>( + typeConverter, patterns.getContext()); } diff --git a/mlir/lib/Dialect/X86Vector/IR/X86VectorDialect.cpp b/mlir/lib/Dialect/X86Vector/IR/X86VectorDialect.cpp index cc7ab7f3f3895..8d383b1f8103b 100644 --- a/mlir/lib/Dialect/X86Vector/IR/X86VectorDialect.cpp +++ b/mlir/lib/Dialect/X86Vector/IR/X86VectorDialect.cpp @@ -31,11 +31,24 @@ void x86vector::X86VectorDialect::initialize() { >(); } -static Value getMemrefBuffPtr(Location loc, MemRefType type, Value buffer, - const LLVMTypeConverter &typeConverter, - RewriterBase &rewriter) { - MemRefDescriptor memRefDescriptor(buffer); - return memRefDescriptor.bufferPtr(rewriter, loc, typeConverter, type); +static SmallVector +getMemrefBuffPtr(Location loc, ::mlir::TypedValue<::mlir::MemRefType> memrefVal, + RewriterBase &rewriter, + const LLVMTypeConverter &typeConverter) { + SmallVector operands; + auto opType = memrefVal.getType(); + + Type llvmStructType = typeConverter.convertType(opType); + Value llvmStruct = + rewriter + .create(loc, llvmStructType, memrefVal) + .getResult(0); + MemRefDescriptor memRefDescriptor(llvmStruct); + + Value ptr = memRefDescriptor.bufferPtr(rewriter, loc, typeConverter, opType); + operands.push_back(ptr); + + return operands; } LogicalResult x86vector::MaskCompressOp::verify() { @@ -53,61 +66,48 @@ LogicalResult x86vector::MaskCompressOp::verify() { } SmallVector x86vector::MaskCompressOp::getIntrinsicOperands( - ArrayRef operands, const LLVMTypeConverter &typeConverter, - RewriterBase &rewriter) { + RewriterBase &rewriter, const LLVMTypeConverter &typeConverter) { auto loc = getLoc(); - Adaptor adaptor(operands, *this); - auto opType = adaptor.getA().getType(); + auto opType = getA().getType(); Value src; - if (adaptor.getSrc()) { - src = adaptor.getSrc(); - } else if (adaptor.getConstantSrc()) { - src = rewriter.create(loc, opType, - adaptor.getConstantSrcAttr()); + if (getSrc()) { + src = getSrc(); + } else if (getConstantSrc()) { + src = rewriter.create(loc, opType, getConstantSrcAttr()); } else { auto zeroAttr = rewriter.getZeroAttr(opType); src = rewriter.create(loc, opType, zeroAttr); } - return SmallVector{adaptor.getA(), src, adaptor.getK()}; + return SmallVector{getA(), src, getK()}; } SmallVector -x86vector::DotOp::getIntrinsicOperands(ArrayRef operands, - const LLVMTypeConverter &typeConverter, - RewriterBase &rewriter) { - SmallVector intrinsicOperands(operands); +x86vector::DotOp::getIntrinsicOperands(RewriterBase &rewriter, + const LLVMTypeConverter &typeConverter) { + SmallVector operands(getOperands()); // Dot product of all elements, broadcasted to all elements. Value scale = rewriter.create(getLoc(), rewriter.getI8Type(), 0xff); - intrinsicOperands.push_back(scale); + operands.push_back(scale); - return intrinsicOperands; + return operands; } SmallVector x86vector::BcstToPackedF32Op::getIntrinsicOperands( - ArrayRef operands, const LLVMTypeConverter &typeConverter, - RewriterBase &rewriter) { - Adaptor adaptor(operands, *this); - return {getMemrefBuffPtr(getLoc(), getA().getType(), adaptor.getA(), - typeConverter, rewriter)}; + RewriterBase &rewriter, const LLVMTypeConverter &typeConverter) { + return getMemrefBuffPtr(getLoc(), getA(), rewriter, typeConverter); } SmallVector x86vector::CvtPackedEvenIndexedToF32Op::getIntrinsicOperands( - ArrayRef operands, const LLVMTypeConverter &typeConverter, - RewriterBase &rewriter) { - Adaptor adaptor(operands, *this); - return {getMemrefBuffPtr(getLoc(), getA().getType(), adaptor.getA(), - typeConverter, rewriter)}; + RewriterBase &rewriter, const LLVMTypeConverter &typeConverter) { + return getMemrefBuffPtr(getLoc(), getA(), rewriter, typeConverter); } SmallVector x86vector::CvtPackedOddIndexedToF32Op::getIntrinsicOperands( - ArrayRef operands, const LLVMTypeConverter &typeConverter, - RewriterBase &rewriter) { - Adaptor adaptor(operands, *this); - return {getMemrefBuffPtr(getLoc(), getA().getType(), adaptor.getA(), - typeConverter, rewriter)}; + RewriterBase &rewriter, const LLVMTypeConverter &typeConverter) { + return getMemrefBuffPtr(getLoc(), getA(), rewriter, typeConverter); } #define GET_OP_CLASSES diff --git a/mlir/lib/Dialect/X86Vector/Transforms/LegalizeForLLVMExport.cpp b/mlir/lib/Dialect/X86Vector/Transforms/LegalizeForLLVMExport.cpp index 483c1f5c3e4c6..9ee44a63ba2e4 100644 --- a/mlir/lib/Dialect/X86Vector/Transforms/LegalizeForLLVMExport.cpp +++ b/mlir/lib/Dialect/X86Vector/Transforms/LegalizeForLLVMExport.cpp @@ -84,23 +84,20 @@ LogicalResult intrinsicRewrite(Operation *op, StringAttr intrinsic, /// Generic one-to-one conversion of simply mappable operations into calls /// to their respective LLVM intrinsics. struct OneToOneIntrinsicOpConversion - : public OpInterfaceConversionPattern { - using OpInterfaceConversionPattern< - x86vector::OneToOneIntrinsicOp>::OpInterfaceConversionPattern; + : public OpInterfaceRewritePattern { + using OpInterfaceRewritePattern< + x86vector::OneToOneIntrinsicOp>::OpInterfaceRewritePattern; OneToOneIntrinsicOpConversion(const LLVMTypeConverter &typeConverter, PatternBenefit benefit = 1) - : OpInterfaceConversionPattern(typeConverter, &typeConverter.getContext(), - benefit), + : OpInterfaceRewritePattern(&typeConverter.getContext(), benefit), typeConverter(typeConverter) {} - LogicalResult - matchAndRewrite(x86vector::OneToOneIntrinsicOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override { - return intrinsicRewrite( - op, rewriter.getStringAttr(op.getIntrinsicName()), - op.getIntrinsicOperands(operands, typeConverter, rewriter), - typeConverter, rewriter); + LogicalResult matchAndRewrite(x86vector::OneToOneIntrinsicOp op, + PatternRewriter &rewriter) const override { + return intrinsicRewrite(op, rewriter.getStringAttr(op.getIntrinsicName()), + op.getIntrinsicOperands(rewriter, typeConverter), + typeConverter, rewriter); } private: diff --git a/mlir/lib/ExecutionEngine/JitRunner.cpp b/mlir/lib/ExecutionEngine/JitRunner.cpp index 2107df37d1997..cf462ddf6f17c 100644 --- a/mlir/lib/ExecutionEngine/JitRunner.cpp +++ b/mlir/lib/ExecutionEngine/JitRunner.cpp @@ -222,14 +222,9 @@ static Error compileAndExecuteVoidFunction( CompileAndExecuteConfig config, std::unique_ptr tm) { auto mainFunction = dyn_cast_or_null( SymbolTable::lookupSymbolIn(module, entryPoint)); - if (!mainFunction || mainFunction.isExternal()) + if (!mainFunction || mainFunction.empty()) return makeStringError("entry point not found"); - if (cast(mainFunction.getFunctionType()) - .getNumParams() != 0) - return makeStringError( - "JIT can't invoke a main function expecting arguments"); - auto resultType = dyn_cast( mainFunction.getFunctionType().getReturnType()); if (!resultType) @@ -279,8 +274,7 @@ Error compileAndExecuteSingleReturnFunction( if (cast(mainFunction.getFunctionType()) .getNumParams() != 0) - return makeStringError( - "JIT can't invoke a main function expecting arguments"); + return makeStringError("function inputs not supported"); if (Error error = checkCompatibleReturnType(mainFunction)) return error; diff --git a/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp index 9470b54c9f3aa..4ea313019f34d 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp @@ -690,13 +690,19 @@ convertOperationImpl(Operation &opInst, llvm::IRBuilderBase &builder, // Emit blockaddress. We first need to find the LLVM block referenced by this // operation and then create a LLVM block address for it. if (auto blockAddressOp = dyn_cast(opInst)) { + // getBlockTagOp() walks a function to search for block labels. Check + // whether it's in cache first. BlockAddressAttr blockAddressAttr = blockAddressOp.getBlockAddr(); - llvm::BasicBlock *llvmBlock = - moduleTranslation.lookupBlockAddress(blockAddressAttr); + BlockTagOp blockTagOp = moduleTranslation.lookupBlockTag(blockAddressAttr); + if (!blockTagOp) { + blockTagOp = blockAddressOp.getBlockTagOp(); + moduleTranslation.mapBlockTag(blockAddressAttr, blockTagOp); + } llvm::Value *llvmValue = nullptr; StringRef fnName = blockAddressAttr.getFunction().getValue(); - if (llvmBlock) { + if (llvm::BasicBlock *llvmBlock = + moduleTranslation.lookupBlock(blockTagOp->getBlock())) { llvm::Function *llvmFn = moduleTranslation.lookupFunction(fnName); llvmValue = llvm::BlockAddress::get(llvmFn, llvmBlock); } else { @@ -730,8 +736,7 @@ convertOperationImpl(Operation &opInst, llvm::IRBuilderBase &builder, FlatSymbolRefAttr::get(&moduleTranslation.getContext(), funcOp.getName()), blockTagOp.getTag()); - moduleTranslation.mapBlockAddress(blockAddressAttr, - builder.GetInsertBlock()); + moduleTranslation.mapBlockTag(blockAddressAttr, blockTagOp); return success(); } diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 010c46358f7df..9f7b5605556e6 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -3720,9 +3720,6 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers( LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams) { - assert(!ompBuilder.Config.isTargetDevice() && - "function only supported for host device codegen"); - // Map the first segment of our structure combinedInfo.Types.emplace_back( isTargetParams @@ -3831,8 +3828,6 @@ static void processMapMembersWithParent( llvm::OpenMPIRBuilder &ompBuilder, DataLayout &dl, MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, llvm::omp::OpenMPOffloadMappingFlags memberOfFlag) { - assert(!ompBuilder.Config.isTargetDevice() && - "function only supported for host device codegen"); auto parentClause = llvm::cast(mapData.MapClause[mapDataIndex]); @@ -3946,9 +3941,6 @@ static void processMapWithMembersOf(LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl, MapInfosTy &combinedInfo, MapInfoData &mapData, uint64_t mapDataIndex, bool isTargetParams) { - assert(!ompBuilder.Config.isTargetDevice() && - "function only supported for host device codegen"); - auto parentClause = llvm::cast(mapData.MapClause[mapDataIndex]); @@ -3990,8 +3982,6 @@ static void createAlteredByCaptureMap(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder) { - assert(!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice() && - "function only supported for host device codegen"); for (size_t i = 0; i < mapData.MapClause.size(); ++i) { // if it's declare target, skip it, it's handled separately. if (!mapData.IsDeclareTarget[i]) { @@ -4056,9 +4046,6 @@ static void genMapInfos(llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, DataLayout &dl, MapInfosTy &combinedInfo, MapInfoData &mapData, bool isTargetParams = false) { - assert(!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice() && - "function only supported for host device codegen"); - // We wish to modify some of the methods in which arguments are // passed based on their capture type by the target region, this can // involve generating new loads and stores, which changes the @@ -4070,7 +4057,8 @@ static void genMapInfos(llvm::IRBuilderBase &builder, // kernel arg structure. It primarily becomes relevant in cases like // bycopy, or byref range'd arrays. In the default case, we simply // pass thee pointer byref as both basePointer and pointer. - createAlteredByCaptureMap(mapData, moduleTranslation, builder); + if (!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice()) + createAlteredByCaptureMap(mapData, moduleTranslation, builder); llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); @@ -4104,8 +4092,6 @@ emitUserDefinedMapper(Operation *declMapperOp, llvm::IRBuilderBase &builder, static llvm::Expected getOrCreateUserDefinedMapperFunc(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { - assert(!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice() && - "function only supported for host device codegen"); auto declMapperOp = cast(op); std::string mapperFuncName = moduleTranslation.getOpenMPBuilder()->createPlatformSpecificName( @@ -4122,8 +4108,6 @@ static llvm::Expected emitUserDefinedMapper(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, llvm::StringRef mapperFuncName) { - assert(!moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice() && - "function only supported for host device codegen"); auto declMapperOp = cast(op); auto declMapperInfoOp = declMapperOp.getDeclareMapperInfo(); DataLayout dl = DataLayout(declMapperOp->getParentOfType()); @@ -4613,8 +4597,6 @@ static void handleDeclareTargetMapVar(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder, llvm::Function *func) { - assert(moduleTranslation.getOpenMPBuilder()->Config.isTargetDevice() && - "function only supported for target device codegen"); for (size_t i = 0; i < mapData.MapClause.size(); ++i) { // In the case of declare target mapped variables, the basePointer is // the reference pointer generated by the convertDeclareTargetAttr @@ -4707,8 +4689,6 @@ createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase::InsertPoint allocaIP, llvm::IRBuilderBase::InsertPoint codeGenIP) { - assert(ompBuilder.Config.isTargetDevice() && - "function only supported for target device codegen"); builder.restoreIP(allocaIP); omp::VariableCaptureKind capture = omp::VariableCaptureKind::ByRef; diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp index 95b8ee0331c55..1168b9f339904 100644 --- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp @@ -1843,13 +1843,17 @@ LogicalResult ModuleTranslation::convertComdats() { LogicalResult ModuleTranslation::convertUnresolvedBlockAddress() { for (auto &[blockAddressOp, llvmCst] : unresolvedBlockAddressMapping) { BlockAddressAttr blockAddressAttr = blockAddressOp.getBlockAddr(); - llvm::BasicBlock *llvmBlock = lookupBlockAddress(blockAddressAttr); + BlockTagOp blockTagOp = lookupBlockTag(blockAddressAttr); + assert(blockTagOp && "expected all block tags to be already seen"); + + llvm::BasicBlock *llvmBlock = lookupBlock(blockTagOp->getBlock()); assert(llvmBlock && "expected LLVM blocks to be already translated"); // Update mapping with new block address constant. auto *llvmBlockAddr = llvm::BlockAddress::get( lookupFunction(blockAddressAttr.getFunction().getValue()), llvmBlock); llvmCst->replaceAllUsesWith(llvmBlockAddr); + mapValue(blockAddressOp.getResult(), llvmBlockAddr); assert(llvmCst->use_empty() && "expected all uses to be replaced"); cast(llvmCst)->eraseFromParent(); } diff --git a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir index 8d720ce62a91b..c7a6eca158276 100644 --- a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir +++ b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir @@ -680,28 +680,3 @@ llvm.func @llvm_nvvm_barrier_arrive(%barID : i32, %numberOfThreads : i32) { nvvm.barrier.arrive id = %barID number_of_threads = %numberOfThreads llvm.return } - - -// ----- - -llvm.func @init_mbarrier( - %barrier_gen : !llvm.ptr, - %barrier : !llvm.ptr<3>, - %count : i32, - %pred : i1) { - // CHECK: llvm.inline_asm has_side_effects asm_dialect = att "mbarrier.init.b64 [$0], $1;", "l,r" - nvvm.inline_ptx "mbarrier.init.b64 [$0], $1;" (%barrier_gen, %count) : !llvm.ptr, i32 - // CHECK: llvm.inline_asm has_side_effects asm_dialect = att "@$2 mbarrier.init.b64 [$0], $1;", "l,r,b" - nvvm.inline_ptx "mbarrier.init.b64 [$0], $1;" (%barrier_gen, %count), predicate = %pred : !llvm.ptr, i32, i1 - llvm.return -} -// ----- - -llvm.func @ex2(%input : f32, %pred : i1) { - // CHECK: %{{.*}} = llvm.inline_asm has_side_effects asm_dialect = att "ex2.approx.ftz.f32 $0, $1;", "=f,f" %{{.*}} : (f32) -> f32 - %0 = nvvm.inline_ptx "ex2.approx.ftz.f32 $0, $1;" (%input) : f32 -> f32 - - // CHECK: %{{.*}} = llvm.inline_asm has_side_effects asm_dialect = att "@$1 ex2.approx.ftz.f32 $0, $1;", "=f,f,b" %{{.*}}, %{{.*}} : (f32, i1) -> f32 - %1 = nvvm.inline_ptx "ex2.approx.ftz.f32 $0, $1;" (%input), predicate = %pred : f32, i1 -> f32 - llvm.return -} diff --git a/mlir/test/Dialect/Linalg/data-layout-propagation.mlir b/mlir/test/Dialect/Linalg/data-layout-propagation.mlir index 31c9e9ed3c501..63f068d3f8681 100644 --- a/mlir/test/Dialect/Linalg/data-layout-propagation.mlir +++ b/mlir/test/Dialect/Linalg/data-layout-propagation.mlir @@ -455,9 +455,10 @@ func.func @unpack_on_output(%arg0: tensor<12x2x56x56x32xf32>) -> tensor<12x56x56 // CHECK: %[[UNPACKED_ARG0:.+]] = linalg.unpack %[[ARG0]] // CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] // CHECK-SAME: into %[[ARG0_EMPTY_UNPACK]] +// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32> // CHECK: %[[RES:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[$MAP]]] -// CHECK-SAME: outs(%[[ARG0]] +// CHECK-SAME: outs(%[[EMPTY]] // CHECK: %[[UNPACK:.+]] = linalg.unpack %[[RES]] // CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] // CHECK-SAME: into %[[UNPACKED_ARG0]] @@ -481,14 +482,11 @@ func.func @unpack_on_input(%arg0: tensor<12x2x56x56x32xf32>, %init: tensor<12x56 // CHECK-LABEL: func.func @unpack_on_input // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]] -// CHECK: %[[ARG1_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32> -// CHECK: %[[ARG1_PACK:.+]] = linalg.pack %[[ARG1]] -// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] -// CHECK-SAME: into %[[ARG1_PACK_EMPTY]] +// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf32> // CHECK: %[[RES:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP]]] // CHECK-SAME: ins(%[[ARG0]] -// CHECK-SAME: outs(%[[ARG1_PACK]] +// CHECK-SAME: outs(%[[EMPTY]] // CHECK: %[[UNPACK:.+]] = linalg.unpack %[[RES]] // CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] // CHECK-SAME: into %[[ARG1]] @@ -512,14 +510,11 @@ func.func @unpack_element_type_change(%arg0: tensor<12x2x56x56x32xf32>, %init: t // CHECK-LABEL: func.func @unpack_element_type_change // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]] -// CHECK: %[[ARG1_PACK_EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf16> -// CHECK: %[[ARG1_PACK:.+]] = linalg.pack %[[ARG1]] -// CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] -// CHECK-SAME: into %[[ARG1_PACK_EMPTY]] +// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<12x2x56x56x32xf16> // CHECK: %[[RES:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP]]] // CHECK-SAME: ins(%[[ARG0]] -// CHECK-SAME: outs(%[[ARG1_PACK]] +// CHECK-SAME: outs(%[[EMPTY]] // CHECK: %[[UNPACK:.+]] = linalg.unpack %[[RES]] // CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] // CHECK-SAME: into %[[ARG1]] @@ -1402,13 +1397,10 @@ func.func @push_unpack_in_padded_domain_foldable(%arg0: tensor<8x8x4x8xf32>, %de // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]] // CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]] -// CHECK: %[[ARG2_PACK_EMPTY:.+]] = tensor.empty -// CHECK: %[[ARG2_PACK:.+]] = linalg.pack %[[ARG2]] -// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [4, 8] -// CHECK-SAME: into %[[ARG2_PACK_EMPTY]] +// CHECK: %[[EMPTY:.+]] = tensor.empty // CHECK: %[[GENERIC:.+]] = linalg.generic // CHECK-SAME: ins(%[[ARG0]] : tensor<8x8x4x8xf32>) -// CHECK-SAME: outs(%[[ARG2_PACK]] : tensor) +// CHECK-SAME: outs(%[[EMPTY]] : tensor) // CHECK: %[[UNPACK:.+]] = linalg.unpack %[[GENERIC]] // CHECK-SAME: into %[[ARG2]] // CHECK: return %[[UNPACK]] : tensor @@ -1427,13 +1419,10 @@ func.func @push_unpack_in_padded_domain_out_used(%arg0: tensor<8x8x4x8xf32>, %ar // CHECK-LABEL: func.func @push_unpack_in_padded_domain_out_used // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]] -// CHECK: %[[ARG1_PACK_EMPTY:.+]] = tensor.empty -// CHECK: %[[ARG1_PACK:.+]] = linalg.pack %[[ARG1]] -// CHECK-SAME: inner_dims_pos = [0, 1] inner_tiles = [4, 8] -// CHECK-SAME: into %[[ARG1_PACK_EMPTY]] +// CHECK: %[[EMPTY:.+]] = tensor.empty // CHECK: %[[GENERIC:.+]] = linalg.generic // CHECK-SAME: ins(%[[ARG0]] : tensor<8x8x4x8xf32>) -// CHECK-SAME: outs(%[[ARG1_PACK]] : tensor) +// CHECK-SAME: outs(%[[EMPTY]] : tensor) // CHECK: %[[UNPACK2:.+]] = linalg.unpack %[[GENERIC]] // CHECK-SAME: into %[[ARG1]] // CHECK: return %[[UNPACK2]] : tensor diff --git a/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir b/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir index 572a2ae70e0a4..4115f2857a20c 100644 --- a/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir @@ -206,106 +206,6 @@ module { #map1 = affine_map<(d0)[s0] -> (d0 * s0)> #map2 = affine_map<(d0)[s0, s1] -> (-(d0 * s1) + s0, s1)> -module { - // CHECK-LABEL: func.func @fuse_tileable_op_through_bbarg_inout - // CHECK-SAME: %[[CHUNK_SIZE:[0-9a-z]+]]: index - // CHECK-SAME: %[[INOUT:[0-9a-z]+]]: tensor - func.func @fuse_tileable_op_through_bbarg_inout(%arg0: index, %arg1: tensor) -> tensor { - %cst = arith.constant 4.200000e+01 : f32 - %c0 = arith.constant 0 : index - %0 = linalg.fill ins(%cst : f32) outs(%arg1 : tensor) -> tensor - %d0 = tensor.dim %arg1, %c0 : tensor - %1 = affine.apply #map0()[%d0, %arg0] - - // CHECK: scf.forall {{.*}} shared_outs(%[[BBARGOUT:.*]] = %[[INOUT]]) -> (tensor) { - %2 = scf.forall (%arg3) in (%1) shared_outs(%o = %arg1) -> (tensor) { - %3 = affine.apply #map1(%arg3)[%arg0] - %4 = affine.min #map2(%arg3)[%d0, %arg0] - %5 = tensor.extract_slice %o[%3] [%4] [1] : tensor to tensor - - // CHECK: %[[T0:.*]] = tensor.extract_slice %[[BBARGOUT]][%{{.*}}] [%{{.*}}] [{{.*}}] - // CHECK: %[[T1:.*]] = tensor.extract_slice %[[BBARGOUT]][%{{.*}}] [%{{.*}}] [{{.*}}] - // CHECK: %[[T2:.*]] = linalg.fill {{.*}} outs(%[[T1]] - %6 = tensor.extract_slice %0[%3] [%4] [1] : tensor to tensor - - // CHECK: %[[T3:.*]] = linalg.elemwise_unary ins(%[[T2]] : tensor) outs(%[[T0]] : tensor) - %7 = linalg.elemwise_unary ins(%6 : tensor) outs(%5 : tensor) -> tensor - scf.forall.in_parallel { - tensor.parallel_insert_slice %7 into %o[%3] [%4] [1] : tensor into tensor - } - } - // CHECK: } - func.return %2 : tensor - } - - module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["linalg.fill"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.structured.match ops{["scf.forall"]} in %arg1 : (!transform.any_op) -> !transform.any_op - - // linalg.fill is tileable. The op is tiled and fused. - transform.structured.fuse_into_containing_op %0 into %1 - : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op) - transform.yield - } - } -} - -// ----- - -module { - // CHECK-LABEL: func.func @fuse_tileable_op_through_bbarg_inout_nested - // CHECK-SAME: %[[ARG0:[0-9a-z]+]]: tensor - // CHECK-SAME: %[[ARG1:[0-9a-z]+]]: tensor - func.func @fuse_tileable_op_through_bbarg_inout_nested(%arg0: tensor, %arg1: tensor) -> tensor { - %c2 = arith.constant 2 : index - %c1 = arith.constant 1 : index - %c0 = arith.constant 0 : index - %0 = linalg.elemwise_unary {fun = #linalg.unary_fn} ins(%arg0 : tensor) outs(%arg1 : tensor) -> tensor - %dim = tensor.dim %arg1, %c0 : tensor - %dim_0 = tensor.dim %arg1, %c1 : tensor - %dim_1 = tensor.dim %arg1, %c2 : tensor - // CHECK: scf.for {{.*}} iter_args(%[[BBARG0:.*]] = %[[ARG1]]) -> (tensor) { - // CHECK: scf.for {{.*}} iter_args(%[[BBARG1:.*]] = %[[BBARG0]]) -> (tensor) { - // CHECK: scf.for {{.*}} iter_args(%[[BBARG2:.*]] = %[[BBARG1]]) -> (tensor) { - %1 = scf.for %arg2 = %c0 to %dim step %c1 iter_args(%arg3 = %arg1) -> (tensor) { - %2 = scf.for %arg4 = %c0 to %dim_0 step %c1 iter_args(%arg5 = %arg3) -> (tensor) { - %3 = scf.for %arg6 = %c0 to %dim_1 step %c1 iter_args(%arg7 = %arg5) -> (tensor) { - // CHECK: %[[EX1:.*]] = tensor.extract_slice %[[BBARG2]]{{.*}}: tensor to tensor<1x1x1xf32> - // CHECK: linalg.elemwise_unary {fun = #linalg.unary_fn} ins({{.*}} : tensor<1x1x1xf32>) outs(%[[EX1]] : tensor<1x1x1xf32>) -> tensor<1x1x1xf32> - // CHECK: %[[EX2:.*]] = tensor.extract_slice %[[BBARG2]]{{.*}} : tensor to tensor<1x1x1xf32> - // CHECK: linalg.elemwise_unary {fun = #linalg.unary_fn} ins({{.*}} : tensor<1x1x1xf32>) outs(%[[EX2]] : tensor<1x1x1xf32>) -> tensor<1x1x1xf32> - %extracted_slice = tensor.extract_slice %0[%arg2, %arg4, %arg6] [1, 1, 1] [1, 1, 1] : tensor to tensor<1x1x1xf32> - %extracted_slice_2 = tensor.extract_slice %arg7[%arg2, %arg4, %arg6] [1, 1, 1] [1, 1, 1] : tensor to tensor<1x1x1xf32> - %4 = linalg.elemwise_unary {fun = #linalg.unary_fn} ins(%extracted_slice : tensor<1x1x1xf32>) outs(%extracted_slice_2 : tensor<1x1x1xf32>) -> tensor<1x1x1xf32> - %inserted_slice = tensor.insert_slice %4 into %arg7[%arg2, %arg4, %arg6] [1, 1, 1] [1, 1, 1] : tensor<1x1x1xf32> into tensor - scf.yield %inserted_slice : tensor - } - scf.yield %3 : tensor - } - scf.yield %2 : tensor - } - return %1 : tensor - } - - module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["linalg.elemwise_unary"]} in %arg0 : (!transform.any_op) -> !transform.any_op - %1 = transform.structured.match ops{["scf.for"]} in %arg0 : (!transform.any_op) -> !transform.any_op - %2:2 = transform.split_handle %0 : (!transform.any_op) -> (!transform.any_op, !transform.any_op) - %3:3 = transform.split_handle %1 : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) - transform.structured.fuse_into_containing_op %2#0 into %3#2 : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op) - transform.yield - } - } -} - -// ----- - -#map0 = affine_map<()[s0, s1] -> (s0 ceildiv s1)> -#map1 = affine_map<(d0)[s0] -> (d0 * s0)> -#map2 = affine_map<(d0)[s0, s1] -> (-(d0 * s1) + s0, s1)> - module { // CHECK-LABEL: func.func @fuse_tileable_multi_output_op // CHECK-SAME: %[[CHUNK_SIZE:[0-9a-z]+]]: index diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir index 6b760a15afd56..299be1296aa66 100644 --- a/mlir/test/Dialect/Linalg/vectorization.mlir +++ b/mlir/test/Dialect/Linalg/vectorization.mlir @@ -641,9 +641,7 @@ func.func @test_masked_vectorize_dynamic_pad( // CHECK-SAME: } : vector<2x4xi1> -> vector<2x4xf32> // CHECK-DAG: %[[empty:.*]] = tensor.empty(%[[res_d0]], %[[res_d1]]) : tensor // CHECK-DAG: %[[c0_3:.*]] = arith.constant 0 : index - // CHECK-DAG: %[[d2:.*]] = tensor.dim %[[empty]], {{.*}} : tensor - // CHECK-DAG: %[[d3:.*]] = tensor.dim %[[empty]], {{.*}} : tensor - // CHECK: %[[mask_2:.*]] = vector.create_mask %[[d2]], %[[d3]] : vector<2x4xi1> + // CHECK: %[[mask_2:.*]] = vector.create_mask %[[res_d0]], %[[res_d1]] : vector<2x4xi1> // CHECK: %[[masked_write:.*]] = vector.mask %[[mask_2]] { // CHECK-SAME: vector.transfer_write %[[masked_read]], %[[empty]][%[[c0_3]], %[[c0_3]]] // CHECK-SAME: {in_bounds = [true, true]} : vector<2x4xf32>, tensor @@ -802,9 +800,7 @@ func.func @test_vectorize_dynamic_pack(%arg0: tensor, %arg1: tensor -// CHECK-DAG: %[[d2:.*]] = tensor.dim %[[empty]], {{.*}} : tensor -// CHECK-DAG: %[[d3:.*]] = tensor.dim %[[empty]], {{.*}} : tensor -// CHECK: %[[mask_0:.*]] = vector.create_mask %[[d2]], %[[d3]], %[[c16]], %[[c2]] : vector<4x1x16x2xi1> +// CHECK: %[[mask_0:.*]] = vector.create_mask %[[d0]], %[[d1]], %[[c16]], %[[c2]] : vector<4x1x16x2xi1> // CHECK: %[[masked_write:.*]] = vector.mask %[[mask_0]] { // CHECK-SAME: vector.transfer_write %[[transpose]], %[[empty]][%[[c0_2]], %[[c0_2]], %[[c0_2]], %[[c0_2]]] // CHECK-SAME: {in_bounds = [true, true, true, true]} : vector<4x1x16x2xf32>, tensor diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir index a9e4af035dbd7..b7e16b7ec35e2 100644 --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -802,14 +802,10 @@ func.func @omp_target_data (%if_cond : i1, %device : si32, %device_ptr: memref, tensor) map_clauses(always, from) capture(ByRef) -> memref {name = ""} omp.target_data if(%if_cond) device(%device : si32) map_entries(%mapv1 : memref){} - // CHECK: %[[MAP_A:.*]] = omp.map.info var_ptr(%{{.*}} : memref, tensor) map_clauses(close, present, to) capture(ByRef) -> memref {name = ""} - // CHECK: %[[DEV_ADDR:.*]] = omp.map.info var_ptr(%{{.*}} : memref, tensor) map_clauses(return_param) capture(ByRef) -> memref {name = ""} - // CHECK: %[[DEV_PTR:.*]] = omp.map.info var_ptr(%{{.*}} : memref, tensor) map_clauses(return_param) capture(ByRef) -> memref {name = ""} - // CHECK: omp.target_data map_entries(%[[MAP_A]] : memref) use_device_addr(%[[DEV_ADDR]] -> %{{.*}} : memref) use_device_ptr(%[[DEV_PTR]] -> %{{.*}} : memref) + // CHECK: %[[MAP_A:.*]] = omp.map.info var_ptr(%[[VAL_2:.*]] : memref, tensor) map_clauses(close, present, to) capture(ByRef) -> memref {name = ""} + // CHECK: omp.target_data map_entries(%[[MAP_A]] : memref) use_device_addr(%[[VAL_3:.*]] -> %{{.*}} : memref) use_device_ptr(%[[VAL_4:.*]] -> %{{.*}} : memref) %mapv2 = omp.map.info var_ptr(%map1 : memref, tensor) map_clauses(close, present, to) capture(ByRef) -> memref {name = ""} - %device_addrv1 = omp.map.info var_ptr(%device_addr : memref, tensor) map_clauses(return_param) capture(ByRef) -> memref {name = ""} - %device_ptrv1 = omp.map.info var_ptr(%device_ptr : memref, tensor) map_clauses(return_param) capture(ByRef) -> memref {name = ""} - omp.target_data map_entries(%mapv2 : memref) use_device_addr(%device_addrv1 -> %arg0 : memref) use_device_ptr(%device_ptrv1 -> %arg1 : memref) { + omp.target_data map_entries(%mapv2 : memref) use_device_addr(%device_addr -> %arg0 : memref) use_device_ptr(%device_ptr -> %arg1 : memref) { omp.terminator } diff --git a/mlir/test/Dialect/Vector/canonicalize.mlir b/mlir/test/Dialect/Vector/canonicalize.mlir index 974f4506a2ef0..99f0850000a16 100644 --- a/mlir/test/Dialect/Vector/canonicalize.mlir +++ b/mlir/test/Dialect/Vector/canonicalize.mlir @@ -450,6 +450,28 @@ func.func @extract_strided_fold_insert(%a: vector<2x8xf32>, %b: vector<1x4xf32>, // ----- +// CHECK-LABEL: transpose_1D_identity +// CHECK-SAME: ([[ARG:%.*]]: vector<4xf32>) +func.func @transpose_1D_identity(%arg : vector<4xf32>) -> vector<4xf32> { + // CHECK-NOT: transpose + %0 = vector.transpose %arg, [0] : vector<4xf32> to vector<4xf32> + // CHECK-NEXT: return [[ARG]] + return %0 : vector<4xf32> +} + +// ----- + +// CHECK-LABEL: transpose_2D_identity +// CHECK-SAME: ([[ARG:%.*]]: vector<4x3xf32>) +func.func @transpose_2D_identity(%arg : vector<4x3xf32>) -> vector<4x3xf32> { + // CHECK-NOT: transpose + %0 = vector.transpose %arg, [0, 1] : vector<4x3xf32> to vector<4x3xf32> + // CHECK-NEXT: return [[ARG]] + return %0 : vector<4x3xf32> +} + +// ----- + // CHECK-LABEL: transpose_3D_identity // CHECK-SAME: ([[ARG:%.*]]: vector<4x3x2xf32>) func.func @transpose_3D_identity(%arg : vector<4x3x2xf32>) -> vector<4x3x2xf32> { diff --git a/mlir/test/Dialect/Vector/canonicalize/vector-transpose.mlir b/mlir/test/Dialect/Vector/canonicalize/vector-transpose.mlir index c84aea6609665..7d8daec4dcba7 100644 --- a/mlir/test/Dialect/Vector/canonicalize/vector-transpose.mlir +++ b/mlir/test/Dialect/Vector/canonicalize/vector-transpose.mlir @@ -1,10 +1,6 @@ // RUN: mlir-opt %s -canonicalize="test-convergence" -split-input-file -allow-unregistered-dialect | FileCheck %s -// This file contains some tests of canonicalizations and foldings involving vector.transpose. - -// +--------------------------------------------------------------------------- -// Tests of FoldTransposeBroadcast: transpose(broadcast) -> broadcast -// +--------------------------------------------------------------------------- +// This file contains some canonicalizations tests involving vector.transpose. // CHECK-LABEL: func @transpose_scalar_broadcast1 // CHECK-SAME: (%[[ARG:.+]]: vector<1xf32>) @@ -252,47 +248,3 @@ func.func @negative_transpose_of_shape_cast(%arg : vector<6xi8>) -> vector<2x3xi %1 = vector.transpose %0, [1, 0] : vector<3x2xi8> to vector<2x3xi8> return %1 : vector<2x3xi8> } - -// ----- - -// +----------------------------------- -// Tests of TransposeOp::fold -// +----------------------------------- - -// CHECK-LABEL: transpose_1D_identity -// CHECK-SAME: [[ARG:%.*]]: vector<4xf32> -// CHECK-NEXT: return [[ARG]] -func.func @transpose_1D_identity(%arg : vector<4xf32>) -> vector<4xf32> { - %0 = vector.transpose %arg, [0] : vector<4xf32> to vector<4xf32> - return %0 : vector<4xf32> -} - -// ----- - -// CHECK-LABEL: transpose_2D_identity -// CHECK-SAME: [[ARG:%.*]]: vector<4x3xf32> -// CHECK-NEXT: return [[ARG]] -func.func @transpose_2D_identity(%arg : vector<4x3xf32>) -> vector<4x3xf32> { - %0 = vector.transpose %arg, [0, 1] : vector<4x3xf32> to vector<4x3xf32> - return %0 : vector<4x3xf32> -} - -// ----- - -// CHECK-LABEL: transpose_shape_and_order_preserving -// CHECK-SAME: [[ARG:%.*]]: vector<6x1x1x4xi8> -// CHECK-NEXT: return [[ARG]] -func.func @transpose_shape_and_order_preserving(%arg : vector<6x1x1x4xi8>) -> vector<6x1x1x4xi8> { - %0 = vector.transpose %arg, [0, 2, 1, 3] : vector<6x1x1x4xi8> to vector<6x1x1x4xi8> - return %0 : vector<6x1x1x4xi8> -} - -// ----- - -// CHECK-LABEL: negative_transpose_fold -// CHECK: [[TRANSP:%.*]] = vector.transpose -// CHECK: return [[TRANSP]] -func.func @negative_transpose_fold(%arg : vector<2x2xi8>) -> vector<2x2xi8> { - %0 = vector.transpose %arg, [1, 0] : vector<2x2xi8> to vector<2x2xi8> - return %0 : vector<2x2xi8> -} diff --git a/mlir/test/Dialect/Vector/linearize.mlir b/mlir/test/Dialect/Vector/linearize.mlir index 9cbf319ffddb2..01ad1ac48b012 100644 --- a/mlir/test/Dialect/Vector/linearize.mlir +++ b/mlir/test/Dialect/Vector/linearize.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -split-input-file -test-vector-linearize -verify-diagnostics | FileCheck %s +// RUN: mlir-opt %s -split-input-file -test-vector-linearize -verify-diagnostics | FileCheck %s // CHECK-LABEL: test_linearize // CHECK-SAME: (%[[ORIG_ARG:.*]]: vector<2x2xf32>) @@ -131,9 +131,9 @@ func.func @test_0d_vector() -> vector { // ----- -// CHECK-LABEL: test_extract_strided_slice_2D +// CHECK-LABEL: test_extract_strided_slice_1 // CHECK-SAME: (%[[ORIG_ARG:.*]]: vector<4x8xf32>) -> vector<2x2xf32> { -func.func @test_extract_strided_slice_2D(%arg0 : vector<4x8xf32>) -> vector<2x2xf32> { +func.func @test_extract_strided_slice_1(%arg0 : vector<4x8xf32>) -> vector<2x2xf32> { // CHECK: %[[ARG:.*]] = vector.shape_cast %[[ORIG_ARG]] : vector<4x8xf32> to vector<32xf32> // CHECK: %[[SHUFFLE:.*]] = vector.shuffle %[[ARG]], %[[ARG]] @@ -147,13 +147,13 @@ func.func @test_extract_strided_slice_2D(%arg0 : vector<4x8xf32>) -> vector<2x2x // ----- -// CHECK-LABEL: func.func @test_extract_strided_slice_2D_scalable( +// CHECK-LABEL: func.func @test_extract_strided_slice_1_scalable( // CHECK-SAME: %[[VAL_0:.*]]: vector<4x[8]xf32>) -> vector<2x[8]xf32> { -func.func @test_extract_strided_slice_2D_scalable(%arg0: vector<4x[8]xf32>) -> vector<2x[8]xf32> { +func.func @test_extract_strided_slice_1_scalable(%arg0: vector<4x[8]xf32>) -> vector<2x[8]xf32> { // CHECK-NOT: vector.shuffle // CHECK-NOT: vector.shape_cast - // CHECK: %[[RES:.*]] = vector.extract_strided_slice %[[VAL_0]] + // CHECK: %[[RES:.*]] = vector.extract_strided_slice %[[VAL_0]] {offsets = [1, 0], sizes = [2, 8], strides = [1, 1]} : vector<4x[8]xf32> to vector<2x[8]xf32> %0 = vector.extract_strided_slice %arg0 { sizes = [2, 8], strides = [1, 1], offsets = [1, 0] } : vector<4x[8]xf32> to vector<2x[8]xf32> // CHECK: return %[[RES]] : vector<2x[8]xf32> @@ -162,9 +162,9 @@ func.func @test_extract_strided_slice_2D_scalable(%arg0: vector<4x[8]xf32>) -> v // ----- -// CHECK-LABEL: test_extract_strided_slice_3D +// CHECK-LABEL: test_extract_strided_slice_2 // CHECK-SAME: (%[[ORIG_ARG:.*]]: vector<2x8x2xf32>) -> vector<1x4x2xf32> { -func.func @test_extract_strided_slice_3D(%arg0 : vector<2x8x2xf32>) -> vector<1x4x2xf32> { +func.func @test_extract_strided_slice_2(%arg0 : vector<2x8x2xf32>) -> vector<1x4x2xf32> { // CHECK: %[[ARG:.*]] = vector.shape_cast %[[ORIG_ARG]] : vector<2x8x2xf32> to vector<32xf32> // CHECK: %[[SHUFFLE:.*]] = vector.shuffle %[[ARG]], %[[ARG]] @@ -178,76 +178,6 @@ func.func @test_extract_strided_slice_3D(%arg0 : vector<2x8x2xf32>) -> vector<1x // ----- -// Test of insert_strided_slice -> shuffle. -// This is a contiguous insertion of 4 elements at offset 6 into a vector of 12 elements. -// CHECK-LABEL: insert_strided_slice_2D_into_4D -func.func @insert_strided_slice_2D_into_4D(%arg0 : vector<2x2xi8>, %arg1 : vector<2x1x3x2xi8>) -> vector<2x1x3x2xi8> { - -// CHECK-DAG: %[[ARG0:.*]] = vector.shape_cast {{.*}} to vector<4xi8> -// CHECK-DAG: %[[ARG1:.*]] = vector.shape_cast {{.*}} to vector<12xi8> -// CHECK: vector.shuffle %[[ARG1]], %[[ARG0]] -// CHECK-SAME: [0, 1, 2, 3, 4, 5, 12, 13, 14, 15, 10, 11] : vector<12xi8>, vector<4xi8> - %0 = vector.insert_strided_slice %arg0, %arg1 {offsets = [1, 0, 0, 0], strides = [1, 1]} : vector<2x2xi8> into vector<2x1x3x2xi8> - -// CHECK: %[[RES:.*]] = vector.shape_cast {{.*}} to vector<2x1x3x2xi8> -// CHECK: return %[[RES]] : vector<2x1x3x2xi8> - return %0 : vector<2x1x3x2xi8> -} - -// ----- - -// Test of insert_strided_slice -> shuffle. -// [[[0, 1], [2, 3], [4, 5]], [[6, 7], [8, 9], [10, 11]], [[12, 13], [14, 15]], [[16, 17]]] -// ^ ^ -// | | -// where the 2 elements are inserted into the 3x3x2 vector -// CHECK-LABEL: insert_strided_slice_3D -func.func @insert_strided_slice_3D(%arg0 : vector<1x2x1xi8>, %arg1 : vector<3x3x2xi8>) -> vector<3x3x2xi8> { - -// CHECK-DAG: %[[ARG0:.*]] = vector.shape_cast {{.*}} to vector<2xi8> -// CHECK-DAG: %[[ARG1:.*]] = vector.shape_cast {{.*}} to vector<18xi8> -// CHECK: vector.shuffle %[[ARG1]], %[[ARG0]] -// CHECK-SAME: [0, 1, 2, 3, 4, 5, 6, 7, 8, 18, 10, 19, 12, 13, 14, 15, 16, 17] : vector<18xi8>, vector<2xi8> - %0 = vector.insert_strided_slice %arg0, %arg1 {offsets = [1, 1, 1], sizes = [1, 2, 1], strides = [1, 1, 1]} : vector<1x2x1xi8> into vector<3x3x2xi8> - -// CHECK: %[[RES:.*]] = vector.shape_cast {{.*}} to vector<3x3x2xi8> -// CHECK: return %[[RES]] : vector<3x3x2xi8> - return %0 : vector<3x3x2xi8> -} - -// ----- - -// CHECK-LABEL: insert_strided_slice_2D_higher_offsets -func.func @insert_strided_slice_2D_higher_offsets(%arg0 : vector<2x1xi8>, %arg1 : vector<2x2xi8>, %arg2 : vector<5x2xi8>) -> vector<5x2xi8> { - - // CHECK: [0, 1, 2, 3, 10, 11, 12, 13, 8, 9] - // ^^^ ^^^ ^^^ ^^^ - // insertion indices - %0 = vector.insert_strided_slice %arg1, %arg2 {offsets = [2, 0], sizes = [2, 2], strides = [1, 1]} : vector<2x2xi8> into vector<5x2xi8> - - // CHECK: [0, 1, 2, 3, 10, 5, 11, 7, 8, 9] - // ^^^ ^^^ - %1 = vector.insert_strided_slice %arg0, %0 {offsets = [2, 0], sizes = [2, 1], strides = [1, 1]} : vector<2x1xi8> into vector<5x2xi8> - - // CHECK: [0, 1, 2, 3, 4, 5, 6, 10, 8, 11] - // ^^^ ^^^ - %2 = vector.insert_strided_slice %arg0, %1 {offsets = [3, 1], sizes = [2, 1], strides = [1, 1]} : vector<2x1xi8> into vector<5x2xi8> - - return %2 : vector<5x2xi8> -} - -// ----- - -// CHECK-LABEL: negative_insert_strided_slice_scalable -// CHECK-NOT: vector.shuffle -// CHECK: return -func.func @negative_insert_strided_slice_scalable(%arg0 : vector<1x[2]xi8>, %arg1 : vector<2x[2]xi8>) -> vector<2x[2]xi8> { - %0 = vector.insert_strided_slice %arg0, %arg1 {offsets = [0, 0], strides = [1,1]} : vector<1x[2]xi8> into vector<2x[2]xi8> - return %0 : vector<2x[2]xi8> -} - -// ----- - // CHECK-LABEL: test_vector_shuffle // CHECK-SAME: (%[[ORIG_ARG0:.*]]: vector<4x2xf32>, %[[ORIG_ARG1:.*]]: vector<4x2xf32>) -> vector<8x2xf32> { func.func @test_vector_shuffle(%arg0: vector<4x2xf32>, %arg1: vector<4x2xf32>) -> vector<8x2xf32> { @@ -392,28 +322,6 @@ func.func @test_vector_bitcast(%arg0: vector<[4]x2xf32>) -> vector<[4]x4xf16> { // ----- -// CHECK-LABEL: test_linearize_across_for -func.func @test_linearize_across_for(%arg0 : vector<4xi8>) -> vector<4xi8> { - %0 = vector.shape_cast %arg0 : vector<4xi8> to vector<2x2xi8> - %c0 = arith.constant 0 : index - %c1 = arith.constant 1 : index - %c4 = arith.constant 4 : index - - // CHECK: scf.for {{.*}} -> (vector<4xi8>) - %1 = scf.for %i = %c0 to %c4 step %c1 iter_args(%arg1 = %0) -> (vector<2x2xi8>) { - - // CHECK: arith.addi {{.*}} : vector<4xi8> - %2 = arith.addi %arg1, %0 : vector<2x2xi8> - - // CHECK: scf.yield {{.*}} : vector<4xi8> - scf.yield %2 : vector<2x2xi8> - } - %3 = vector.shape_cast %1 : vector<2x2xi8> to vector<4xi8> - return %3 : vector<4xi8> -} - -// ----- - // CHECK-LABEL: linearize_vector_splat // CHECK-SAME: (%[[ARG:.*]]: i32) -> vector<4x2xi32> func.func @linearize_vector_splat(%arg0: i32) -> vector<4x2xi32> { @@ -436,31 +344,4 @@ func.func @linearize_scalable_vector_splat(%arg0: i32) -> vector<4x[2]xi32> { // CHECK: return %[[CAST]] : vector<4x[2]xi32> %0 = vector.splat %arg0 : vector<4x[2]xi32> return %0 : vector<4x[2]xi32> - -} - -// ----- - -// CHECK-LABEL: linearize_create_mask -// CHECK-SAME: (%[[ARG0:.*]]: index, %[[ARG1:.*]]: index) -> vector<1x16xi1> -func.func @linearize_create_mask(%arg0 : index, %arg1 : index) -> vector<1x16xi1> { - - // CHECK: %[[C0:.*]] = arith.constant 0 : index - // CHECK: %[[CMP:.*]] = arith.cmpi sgt, %[[ARG0]], %[[C0]] : index - // CHECK: %[[INDEXCAST:.*]] = arith.index_cast %[[CMP]] : i1 to index - // CHECK: %[[MULI:.*]] = arith.andi %[[INDEXCAST]], %[[ARG1]] : index - // CHECK: %[[MASK_1D:.*]] = vector.create_mask %[[MULI]] : vector<16xi1> - // CHECK: %[[CAST:.*]] = vector.shape_cast %[[MASK_1D]] : vector<16xi1> to vector<1x16xi1> - // CHECK: return %[[CAST]] : vector<1x16xi1> - %0 = vector.create_mask %arg0, %arg1 : vector<1x16xi1> - return %0 : vector<1x16xi1> -} - -// ----- -// CHECK-LABEL: linearize_scalable_create_mask -func.func @linearize_scalable_create_mask(%arg0 : index, %arg1 : index) -> vector<1x[16]xi1> { - - // CHECK: %[[MASK_1D:.*]] = vector.create_mask {{%.*}} : vector<[16]xi1> - %0 = vector.create_mask %arg0, %arg1 : vector<1x[16]xi1> - return %0 : vector<1x[16]xi1> } diff --git a/mlir/test/Dialect/Vector/vector-transpose-lowering.mlir b/mlir/test/Dialect/Vector/vector-transpose-lowering.mlir index a730f217f027d..83395504e8c74 100644 --- a/mlir/test/Dialect/Vector/vector-transpose-lowering.mlir +++ b/mlir/test/Dialect/Vector/vector-transpose-lowering.mlir @@ -65,15 +65,13 @@ func.func @transpose102_8x1x8xf32(%arg0: vector<8x1x8xf32>) -> vector<1x8x8xf32> return %0 : vector<1x8x8xf32> } -// CHECK-LABEL: func @transpose1023_2x1x8x4xf32( -func.func @transpose1023_2x1x8x4xf32(%arg0: vector<2x1x8x4xf32>) -> vector<1x2x8x4xf32> { - // Note the 2-D extract/insert pair since dimensions 2 and 3 are not transposed! - // CHECK: vector.extract {{.*}}[0, 0] : vector<8x4xf32> from vector<2x1x8x4xf32> - // CHECK-NEXT: vector.insert {{.*}} [0, 0] : vector<8x4xf32> into vector<1x2x8x4xf32> - // CHECK-NEXT: vector.extract {{.*}}[1, 0] : vector<8x4xf32> from vector<2x1x8x4xf32> - // CHECK-NEXT: vector.insert {{.*}} [0, 1] : vector<8x4xf32> into vector<1x2x8x4xf32> - %0 = vector.transpose %arg0, [1, 0, 2, 3] : vector<2x1x8x4xf32> to vector<1x2x8x4xf32> - return %0 : vector<1x2x8x4xf32> +// CHECK-LABEL: func @transpose1023_1x1x8x8xf32( +func.func @transpose1023_1x1x8x8xf32(%arg0: vector<1x1x8x8xf32>) -> vector<1x1x8x8xf32> { + // Note the single 2-D extract/insert pair since 2 and 3 are not transposed! + // CHECK: vector.extract {{.*}}[0, 0] : vector<8x8xf32> from vector<1x1x8x8xf32> + // CHECK-NEXT: vector.insert {{.*}} [0, 0] : vector<8x8xf32> into vector<1x1x8x8xf32> + %0 = vector.transpose %arg0, [1, 0, 2, 3] : vector<1x1x8x8xf32> to vector<1x1x8x8xf32> + return %0 : vector<1x1x8x8xf32> } /// Scalable dim should not be unrolled. diff --git a/mlir/test/IR/parser.mlir b/mlir/test/IR/parser.mlir index 3bb6e38b4d613..bb19cb63b65a5 100644 --- a/mlir/test/IR/parser.mlir +++ b/mlir/test/IR/parser.mlir @@ -1232,13 +1232,6 @@ func.func @parse_base64_test() { return } -// CHECK-LABEL: func @parse_slash_test -func.func @parse_slash_test() { - // CHECK: "test.slash_attr"() <{attr = #test.slash_attr<1 / 2>}> : () -> () - "test.slash_attr"() { attr = #test.slash_attr<1 / 2> } : () -> () - return -} - // CHECK-LABEL: func @"\22_string_symbol_reference\22" func.func @"\"_string_symbol_reference\""() { // CHECK: ref = @"\22_string_symbol_reference\22" diff --git a/mlir/test/Target/LLVMIR/blockaddress.mlir b/mlir/test/Target/LLVMIR/blockaddress.mlir index 4473f91c4bdb5..fb3d853531122 100644 --- a/mlir/test/Target/LLVMIR/blockaddress.mlir +++ b/mlir/test/Target/LLVMIR/blockaddress.mlir @@ -34,32 +34,3 @@ llvm.func @blockaddr0() -> !llvm.ptr { // CHECK: [[RET]]: // CHECK: ret ptr blockaddress(@blockaddr0, %1) // CHECK: } - -// ----- - -llvm.mlir.global private @h() {addr_space = 0 : i32, dso_local} : !llvm.ptr { - %0 = llvm.blockaddress > : !llvm.ptr - llvm.return %0 : !llvm.ptr -} - -// CHECK: @h = private global ptr blockaddress(@h3, %[[BB_ADDR:.*]]) - -// CHECK: define void @h3() { -// CHECK: br label %[[BB_ADDR]] - -// CHECK: [[BB_ADDR]]: -// CHECK: ret void -// CHECK: } - -// CHECK: define void @h0() - -llvm.func @h3() { - llvm.br ^bb1 -^bb1: - llvm.blocktag - llvm.return -} - -llvm.func @h0() { - llvm.return -} diff --git a/mlir/test/lib/Dialect/Test/TestAttrDefs.td b/mlir/test/lib/Dialect/Test/TestAttrDefs.td index 4d825e2f0a8cc..d42f469c39c69 100644 --- a/mlir/test/lib/Dialect/Test/TestAttrDefs.td +++ b/mlir/test/lib/Dialect/Test/TestAttrDefs.td @@ -418,17 +418,4 @@ def TestOpAsmAttrInterfaceTablegenDefaultAttr : Test_Attr<"TestOpAsmAttrInterfac let genMnemonicAlias = 1; } -// Test attribute containing a slash token -def SlashAttr: Test_Attr<"Slash">{ - let mnemonic = "slash_attr"; - - let parameters = ( - ins - "int":$lhs, - "int":$rhs - ); - - let hasCustomAssemblyFormat = 1; -} - #endif // TEST_ATTRDEFS diff --git a/mlir/test/lib/Dialect/Test/TestAttributes.cpp b/mlir/test/lib/Dialect/Test/TestAttributes.cpp index 80661e68754ce..b36f246b83d76 100644 --- a/mlir/test/lib/Dialect/Test/TestAttributes.cpp +++ b/mlir/test/lib/Dialect/Test/TestAttributes.cpp @@ -497,24 +497,6 @@ getDynamicCustomAssemblyFormatAttr(TestDialect *testDialect) { std::move(parser), std::move(printer)); } -//===----------------------------------------------------------------------===// -// SlashAttr -//===----------------------------------------------------------------------===// - -Attribute SlashAttr::parse(AsmParser &parser, Type type) { - int lhs, rhs; - - if (parser.parseLess() || parser.parseInteger(lhs) || parser.parseSlash() || - parser.parseInteger(rhs) || parser.parseGreater()) - return Attribute(); - - return SlashAttr::get(parser.getContext(), lhs, rhs); -} - -void SlashAttr::print(AsmPrinter &printer) const { - printer << "<" << getLhs() << " / " << getRhs() << ">"; -} - //===----------------------------------------------------------------------===// // TestDialect //===----------------------------------------------------------------------===// diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td index 43a0bdaf86cf3..4848e232472f0 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -324,10 +324,6 @@ def DenseArrayAttrOp : TEST_Op<"dense_array_attr"> { }]; } -def SlashAttrOp : TEST_Op<"slash_attr"> { - let arguments = (ins SlashAttr:$attr); -} - //===----------------------------------------------------------------------===// // Test Attributes Constraints //===----------------------------------------------------------------------===// diff --git a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp index ccba2e2806862..eda2594fbc7c7 100644 --- a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp +++ b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp @@ -17,7 +17,6 @@ #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h" #include "mlir/Dialect/SCF/IR/SCF.h" -#include "mlir/Dialect/SCF/Transforms/Patterns.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" #include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" @@ -837,6 +836,9 @@ struct TestVectorEmulateMaskedLoadStore final } }; +// TODO: move this code into the user project. +namespace vendor { + /// Get the set of operand/result types to check for sufficiently /// small inner-most dimension size. static SmallVector> @@ -958,6 +960,8 @@ struct TestVectorBitWidthLinearize final } }; +} // namespace vendor + struct TestVectorLinearize final : public PassWrapper> { MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestVectorLinearize) @@ -969,7 +973,7 @@ struct TestVectorLinearize final return "Linearizes ND vectors for N >= 2 into 1D vectors"; } void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); + registry.insert(); } void runOnOperation() override { @@ -983,8 +987,6 @@ struct TestVectorLinearize final vector::populateVectorLinearizeBasePatterns(converter, target, patterns); vector::populateVectorLinearizeShuffleLikeOpsPatterns(converter, target, patterns); - mlir::scf::populateSCFStructuralTypeConversionsAndLegality( - converter, patterns, target); if (failed(applyPartialConversion(getOperation(), target, std::move(patterns)))) @@ -1065,7 +1067,7 @@ void registerTestVectorLowerings() { PassRegistration(); - PassRegistration(); + PassRegistration(); PassRegistration(); } diff --git a/mlir/test/mlir-runner/verify-entry-point-result.mlir b/mlir/test/mlir-runner/verify-entry-point-result.mlir new file mode 100644 index 0000000000000..ad46e0b5fe1bf --- /dev/null +++ b/mlir/test/mlir-runner/verify-entry-point-result.mlir @@ -0,0 +1,7 @@ +// RUN: not mlir-runner %s -e entry -entry-point-result=void 2>&1 | FileCheck %s + +// CHECK: Error: expected void function +llvm.func @entry() -> (i32) { + %0 = llvm.mlir.constant(0 : index) : i32 + llvm.return %0 : i32 +} diff --git a/mlir/test/mlir-runner/verify-entry-point.mlir b/mlir/test/mlir-runner/verify-entry-point.mlir deleted file mode 100644 index c7165bd46302f..0000000000000 --- a/mlir/test/mlir-runner/verify-entry-point.mlir +++ /dev/null @@ -1,48 +0,0 @@ -// RUN: not mlir-runner %s -e entry_point_void -entry-point-result=void 2>&1 | FileCheck %s --check-prefix=CHECK-ENTRY-POINT-VOID -// RUN: not mlir-runner %s -e entry_inputs_void -entry-point-result=void 2>&1 | FileCheck %s --check-prefix=CHECK-ENTRY-INPUTS-VOID -// RUN: not mlir-runner %s -e entry_result_void -entry-point-result=void 2>&1 | FileCheck %s --check-prefix=CHECK-ENTRY-RESULT-VOID -// RUN: not mlir-runner %s -e entry_point_i32 -entry-point-result=i32 2>&1 | FileCheck %s --check-prefix=CHECK-ENTRY-POINT-I32 -// RUN: not mlir-runner %s -e entry_inputs_i32 -entry-point-result=i32 2>&1 | FileCheck %s --check-prefix=CHECK-ENTRY-INPUTS-I32 -// RUN: not mlir-runner %s -e entry_result_i32 -entry-point-result=i32 2>&1 | FileCheck %s --check-prefix=CHECK-ENTRY-RESULT-I32 -// RUN: not mlir-runner %s -e entry_result_i64 -entry-point-result=i64 2>&1 | FileCheck %s --check-prefix=CHECK-ENTRY-RESULT-I64 -// RUN: not mlir-runner %s -e entry_result_f32 -entry-point-result=f32 2>&1 | FileCheck %s --check-prefix=CHECK-ENTRY-RESULT-F32 - -// CHECK-ENTRY-POINT-VOID: Error: entry point not found -llvm.func @entry_point_void() -> () - -// CHECK-ENTRY-INPUTS-VOID: Error: JIT can't invoke a main function expecting arguments -llvm.func @entry_inputs_void(%arg0: i32) { - llvm.return -} - -// CHECK-ENTRY-RESULT-VOID: Error: expected void function -llvm.func @entry_result_void() -> (i32) { - %0 = llvm.mlir.constant(0 : index) : i32 - llvm.return %0 : i32 -} - -// CHECK-ENTRY-POINT-I32: Error: entry point not found -llvm.func @entry_point_i32() -> (i32) - -// CHECK-ENTRY-INPUTS-I32: Error: JIT can't invoke a main function expecting arguments -llvm.func @entry_inputs_i32(%arg0: i32) { - llvm.return -} - -// CHECK-ENTRY-RESULT-I32: Error: only single i32 function result supported -llvm.func @entry_result_i32() -> (i64) { - %0 = llvm.mlir.constant(0 : index) : i64 - llvm.return %0 : i64 -} - -// CHECK-ENTRY-RESULT-I64: Error: only single i64 function result supported -llvm.func @entry_result_i64() -> (i32) { - %0 = llvm.mlir.constant(0 : index) : i32 - llvm.return %0 : i32 -} - -// CHECK-ENTRY-RESULT-F32: Error: only single f32 function result supported -llvm.func @entry_result_f32() -> (i32) { - %0 = llvm.mlir.constant(0 : index) : i32 - llvm.return %0 : i32 -} diff --git a/mlir/test/mlir-tblgen/cpp-class-comments.td b/mlir/test/mlir-tblgen/cpp-class-comments.td deleted file mode 100644 index a896888d944b2..0000000000000 --- a/mlir/test/mlir-tblgen/cpp-class-comments.td +++ /dev/null @@ -1,139 +0,0 @@ -// RUN: mlir-tblgen -gen-dialect-decls -I %S/../../include %s | FileCheck %s --check-prefix=DIALECT -// RUN: mlir-tblgen -gen-op-decls -I %S/../../include %s | FileCheck %s --check-prefix=OP -// RUN: mlir-tblgen -gen-typedef-decls -I %S/../../include %s | FileCheck %s --check-prefix=TYPE -// RUN: mlir-tblgen -gen-attrdef-decls -I %S/../../include %s | FileCheck %s --check-prefix=ATTR -// RUN: mlir-tblgen -gen-attr-interface-decls -I %S/../../include %s | FileCheck %s --check-prefix=ATTR-INTERFACE -// RUN: mlir-tblgen -gen-op-interface-decls -I %S/../../include %s | FileCheck %s --check-prefix=OP-INTERFACE -// RUN: mlir-tblgen -gen-type-interface-decls -I %S/../../include %s | FileCheck %s --check-prefix=TYPE-INTERFACE -// RUN: mlir-tblgen -gen-enum-decls -I %S/../../include %s | FileCheck %s --check-prefix=ENUM - -include "mlir/IR/AttrTypeBase.td" -include "mlir/IR/EnumAttr.td" -include "mlir/IR/OpBase.td" - -// check dialect with summary and description -def A_Dialect : Dialect { - let name = "a"; - let cppNamespace = ""; - - let summary = "This is a summary"; - let description = [{ - - This is a description, needs trimming - - }]; -// DIALECT: /// This is a summary -// DIALECT-NEXT: /// This is a description, needs trimming -// DIALECT-NEXT: class ADialect : public ::mlir::Dialect { -} - -def A_SomeOp1 : Op{ - let summary = "Some Op1 summary line1 \nsummary line2"; - - let description = [{ - Some Op1 description - }]; - - let cppNamespace = "OP1"; -// OP: namespace OP1 -// OP-NEXT: /// Some Op1 summary line1 -// OP-NEXT: /// summary line2 -// OP-NEXT: /// Some Op1 description -// OP-NEXT: class SomeOp1; -} - -// test weird characters in description -def A_SomeOp2 : Op{ - let summary = ""; - - let description = [{ - $ptr (`,` $mask^)? (`,` $other^)? - oilist( - `a` `=` $1 | `b` `=` $2 - ) - }]; -// OP: /// $ptr (`,` $mask^)? (`,` $other^)? -// OP-NEXT: /// oilist( -// OP-NEXT: /// `a` `=` $1 | `b` `=` $2 -// OP-NEXT: /// ) -// OP-NEXT: class SomeOp2; -} - -def A_TensorType : TypeDef { - let typeName = "a.simple_a_tensor"; - - let summary = "Tensor Type A summary"; - - let description = [{ - Tensor Type A description - }]; - - let extraClassDeclaration = [{ - void getSignlessBlockType() const { - } - }]; -// TYPE: /// Tensor Type A summary -// TYPE-NEXT: /// Tensor Type A description -// TYPE-NEXT: class TensorType; -} - -def A_SimpleAttr : AttrDef { - let attrName = "a.simple_attr"; - let summary = "Simple Attr A summary"; - - let description = [{ - Simple Attr A description - }]; -// ATTR: /// Simple Attr A summary -// ATTR-NEXT: /// Simple Attr A description -// ATTR-NEXT: class SimpleAAttr; -} - -def EncodingTrait : AttrInterface<"EncodingTrait"> { - let cppNamespace = "mlir::a::traits"; - let description = [{ - Common trait for all layouts. - }]; - let methods = [ - ]; -// ATTR-INTERFACE: namespace mlir -// ATTR-INTERFACE-NEXT: namespace a -// ATTR-INTERFACE-NEXT: namespace traits -// ATTR-INTERFACE-NEXT: /// Common trait for all layouts. -// ATTR-INTERFACE-NEXT: class EncodingTrait; -} - -def SimpleEncodingTrait : AttrInterface<"SimpleEncodingTrait"> { - let cppNamespace = "a::traits"; -// ATTR-INTERFACE: namespace a { -// ATTR-INTERFACE-NEXT: namespace traits { -// ATTR-INTERFACE-NEXT: class SimpleEncodingTrait; -} - -def SimpleOpInterface : OpInterface<"SimpleOpInterface"> { - let cppNamespace = "a::traits"; - let description = [{ - - Simple Op Interface description - }]; -// OP-INTERFACE: namespace a { -// OP-INTERFACE-NEXT: namespace traits { -// OP-INTERFACE-NEXT: /// Simple Op Interface description -// OP-INTERFACE-NEXT: class SimpleOpInterface; -} - -def SimpleTypeInterface : TypeInterface<"SimpleTypeInterface"> { - let description = [{ - Simple Type Interface description - }]; -// TYPE-INTERFACE: /// Simple Type Interface description -// TYPE-INTERFACE-NEXT: class SimpleTypeInterface; -} - -def MyBitEnum: I32BitEnumAttr<"MyBitEnum", "An example bit enum", - [I32BitEnumCaseBit<"Bit0", 0, "tagged">, - I32BitEnumCaseBit<"Bit1", 1>]> { - let genSpecializedAttr = 0; -// ENUM: // An example bit enum -// ENUM-NEXT: enum class MyBitEnum -} diff --git a/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp b/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp index 2a6071602fa49..05686c0539754 100644 --- a/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp +++ b/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "AttrOrTypeFormatGen.h" -#include "CppGenUtilities.h" #include "mlir/TableGen/AttrOrTypeDef.h" #include "mlir/TableGen/Class.h" #include "mlir/TableGen/CodeGenHelpers.h" @@ -814,14 +813,8 @@ bool DefGenerator::emitDecls(StringRef selectedDialect) { NamespaceEmitter nsEmitter(os, defs.front().getDialect()); // Declare all the def classes first (in case they reference each other). - for (const AttrOrTypeDef &def : defs) { - std::string comments = tblgen::emitSummaryAndDescComments( - def.getSummary(), def.getDescription()); - if (!comments.empty()) { - os << comments << "\n"; - } + for (const AttrOrTypeDef &def : defs) os << "class " << def.getCppClassName() << ";\n"; - } // Emit the declarations. for (const AttrOrTypeDef &def : defs) diff --git a/mlir/tools/mlir-tblgen/CMakeLists.txt b/mlir/tools/mlir-tblgen/CMakeLists.txt index 2a7ef7e0576c8..9431c59860522 100644 --- a/mlir/tools/mlir-tblgen/CMakeLists.txt +++ b/mlir/tools/mlir-tblgen/CMakeLists.txt @@ -33,7 +33,6 @@ add_tablegen(mlir-tblgen MLIR RewriterGen.cpp SPIRVUtilsGen.cpp TosaUtilsGen.cpp - CppGenUtilities.cpp ) target_link_libraries(mlir-tblgen diff --git a/mlir/tools/mlir-tblgen/CppGenUtilities.cpp b/mlir/tools/mlir-tblgen/CppGenUtilities.cpp deleted file mode 100644 index ebca20cc685f4..0000000000000 --- a/mlir/tools/mlir-tblgen/CppGenUtilities.cpp +++ /dev/null @@ -1,39 +0,0 @@ -//===- CppGenUtilities.cpp - MLIR cpp gen utilities --------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Defines common utilities for generating cpp files from tablegen -// structures. -// -//===----------------------------------------------------------------------===// - -#include "CppGenUtilities.h" -#include "mlir/Support/IndentedOstream.h" - -std::string -mlir::tblgen::emitSummaryAndDescComments(llvm::StringRef summary, - llvm::StringRef description) { - - std::string comments = ""; - StringRef trimmedSummary = summary.trim(); - StringRef trimmedDesc = description.trim(); - llvm::raw_string_ostream os(comments); - raw_indented_ostream ros(os); - - if (!trimmedSummary.empty()) { - ros.printReindented(trimmedSummary, "/// "); - } - - if (!trimmedDesc.empty()) { - if (!trimmedSummary.empty()) { - // If there is a summary, add a newline after it. - ros << "\n"; - } - ros.printReindented(trimmedDesc, "/// "); - } - return comments; -} diff --git a/mlir/tools/mlir-tblgen/CppGenUtilities.h b/mlir/tools/mlir-tblgen/CppGenUtilities.h deleted file mode 100644 index 231c59a9e148f..0000000000000 --- a/mlir/tools/mlir-tblgen/CppGenUtilities.h +++ /dev/null @@ -1,29 +0,0 @@ -//===- CppGenUtilities.h - MLIR cpp gen utilities ---------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines common utilities for generating cpp files from tablegen -// structures. -// -//===----------------------------------------------------------------------===// - -#ifndef MLIR_TOOLS_MLIRTBLGEN_CPPGENUTILITIES_H_ -#define MLIR_TOOLS_MLIRTBLGEN_CPPGENUTILITIES_H_ - -#include "llvm/ADT/StringRef.h" - -namespace mlir { -namespace tblgen { - -// Emit the summary and description as a C++ comment, perperly aligned placed -// adjacent to the class declaration of generated classes. -std::string emitSummaryAndDescComments(llvm::StringRef summary, - llvm::StringRef description); -} // namespace tblgen -} // namespace mlir - -#endif // MLIR_TOOLS_MLIRTBLGEN_CPPGENUTILITIES_H_ diff --git a/mlir/tools/mlir-tblgen/DialectGen.cpp b/mlir/tools/mlir-tblgen/DialectGen.cpp index 02941ec1268cb..6cf71d2bb0174 100644 --- a/mlir/tools/mlir-tblgen/DialectGen.cpp +++ b/mlir/tools/mlir-tblgen/DialectGen.cpp @@ -10,7 +10,6 @@ // //===----------------------------------------------------------------------===// -#include "CppGenUtilities.h" #include "DialectGenUtilities.h" #include "mlir/TableGen/Class.h" #include "mlir/TableGen/CodeGenHelpers.h" @@ -109,9 +108,7 @@ tblgen::findDialectToGenerate(ArrayRef dialects) { /// {0}: The name of the dialect class. /// {1}: The dialect namespace. /// {2}: The dialect parent class. -/// {3}: The summary and description comments. static const char *const dialectDeclBeginStr = R"( -{3} class {0} : public ::mlir::{2} { explicit {0}(::mlir::MLIRContext *context); @@ -248,11 +245,8 @@ static void emitDialectDecl(Dialect &dialect, raw_ostream &os) { std::string cppName = dialect.getCppClassName(); StringRef superClassName = dialect.isExtensible() ? "ExtensibleDialect" : "Dialect"; - - std::string comments = tblgen::emitSummaryAndDescComments( - dialect.getSummary(), dialect.getDescription()); os << llvm::formatv(dialectDeclBeginStr, cppName, dialect.getName(), - superClassName, comments); + superClassName); // If the dialect requested the default attribute printer and parser, emit // the declarations for the hooks. diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp index 373d3762cbb1a..3f397f3a8e6fd 100644 --- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp +++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#include "CppGenUtilities.h" #include "OpClass.h" #include "OpFormatGen.h" #include "OpGenHelpers.h" @@ -2641,7 +2640,8 @@ void OpEmitter::genSeparateArgParamBuilder() { // Avoid emitting "resultTypes.size() >= 0u" which is always true. if (!hasVariadicResult || numNonVariadicResults != 0) - body << " " << "assert(resultTypes.size() " + body << " " + << "assert(resultTypes.size() " << (hasVariadicResult ? ">=" : "==") << " " << numNonVariadicResults << "u && \"mismatched number of results\");\n"; @@ -4749,11 +4749,6 @@ static void emitOpClassDecls(const RecordKeeper &records, for (auto *def : defs) { Operator op(*def); NamespaceEmitter emitter(os, op.getCppNamespace()); - std::string comments = tblgen::emitSummaryAndDescComments( - op.getSummary(), op.getDescription()); - if (!comments.empty()) { - os << comments << "\n"; - } os << "class " << op.getCppClassName() << ";\n"; } diff --git a/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp b/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp index 4dfa1908b3267..dcd68e6c2d636 100644 --- a/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp +++ b/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp @@ -10,7 +10,6 @@ // //===----------------------------------------------------------------------===// -#include "CppGenUtilities.h" #include "DocGenUtilities.h" #include "mlir/TableGen/Format.h" #include "mlir/TableGen/GenInfo.h" @@ -528,11 +527,6 @@ void InterfaceGenerator::emitInterfaceDecl(const Interface &interface) { // Emit a forward declaration of the interface class so that it becomes usable // in the signature of its methods. - std::string comments = tblgen::emitSummaryAndDescComments( - "", interface.getDescription().value_or("")); - if (!comments.empty()) { - os << comments << "\n"; - } os << "class " << interfaceName << ";\n"; // Emit the traits struct containing the concept and model declarations. @@ -595,8 +589,7 @@ void InterfaceGenerator::emitInterfaceDecl(const Interface &interface) { << " auto* interface = getInterfaceFor(base);\n" << " if (!interface)\n" " return false;\n" - " " - << interfaceName << " odsInterfaceInstance(base, interface);\n" + " " << interfaceName << " odsInterfaceInstance(base, interface);\n" << " " << tblgen::tgfmt(extraClassOf->trim(), &extraClassOfFmt) << "\n }\n"; } diff --git a/offload/test/offloading/gpupgo/pgo_device_only.c b/offload/test/offloading/gpupgo/pgo1.c similarity index 85% rename from offload/test/offloading/gpupgo/pgo_device_only.c rename to offload/test/offloading/gpupgo/pgo1.c index 2939af613b6dd..1159858c51218 100644 --- a/offload/test/offloading/gpupgo/pgo_device_only.c +++ b/offload/test/offloading/gpupgo/pgo1.c @@ -23,12 +23,10 @@ int test2(int a) { return a * 2; } int main() { int m = 2; #pragma omp target - { - for (int i = 0; i < 10; i++) { - m = test1(m); - for (int j = 0; j < 2; j++) { - m = test2(m); - } + for (int i = 0; i < 10; i++) { + m = test1(m); + for (int j = 0; j < 2; j++) { + m = test2(m); } } } @@ -36,7 +34,7 @@ int main() { // LLVM-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}: // LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}} // LLVM-PGO: Counters: 4 -// LLVM-PGO: Block counts: [20, 10, {{.*}}, 1] +// LLVM-PGO: Block counts: [20, 10, 2, 1] // LLVM-PGO-LABEL: test1: // LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}} @@ -55,10 +53,14 @@ int main() { // LLVM-PGO-SAME: 3 // LLVM-PGO-LABEL: Maximum function count: // LLVM-PGO-SAME: 20 +// LLVM-PGO-LABEL: Maximum internal block count: +// LLVM-PGO-SAME: 10 // CLANG-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}: // CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}} -// CLANG-PGO: Block counts: [10, 20] +// CLANG-PGO: Counters: 3 +// CLANG-PGO: Function count: 0 +// CLANG-PGO: Block counts: [11, 20] // CLANG-PGO-LABEL: test1: // CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}} @@ -76,5 +78,7 @@ int main() { // CLANG-PGO-SAME: Front-end // CLANG-PGO-LABEL: Functions shown: // CLANG-PGO-SAME: 3 +// CLANG-PGO-LABEL: Maximum function count: +// CLANG-PGO-SAME: 20 // CLANG-PGO-LABEL: Maximum internal block count: // CLANG-PGO-SAME: 20 diff --git a/offload/test/offloading/gpupgo/pgo_device_and_host.c b/offload/test/offloading/gpupgo/pgo2.c similarity index 95% rename from offload/test/offloading/gpupgo/pgo_device_and_host.c rename to offload/test/offloading/gpupgo/pgo2.c index 3e95791ce9a50..af3ad9e4a6c19 100644 --- a/offload/test/offloading/gpupgo/pgo_device_and_host.c +++ b/offload/test/offloading/gpupgo/pgo2.c @@ -59,10 +59,8 @@ int main() { int device_var = 1; #pragma omp target - { - for (int i = 0; i < 10; i++) { - device_var *= i; - } + for (int i = 0; i < 10; i++) { + device_var *= i; } } @@ -80,7 +78,7 @@ int main() { // LLVM-DEVICE-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}: // LLVM-DEVICE: Hash: {{0[xX][0-9a-fA-F]+}} // LLVM-DEVICE: Counters: 3 -// LLVM-DEVICE: Block counts: [10, {{.*}}, 1] +// LLVM-DEVICE: Block counts: [10, 2, 1] // LLVM-DEVICE: Instrumentation level: IR // CLANG-HOST-LABEL: main: @@ -99,5 +97,6 @@ int main() { // CLANG-DEV-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}: // CLANG-DEV: Hash: {{0[xX][0-9a-fA-F]+}} // CLANG-DEV: Counters: 2 -// CLANG-DEV: Block counts: [10] +// CLANG-DEV: Function count: 0 +// CLANG-DEV: Block counts: [11] // CLANG-DEV: Instrumentation level: Front-end diff --git a/offload/test/offloading/gpupgo/pgo_atomic_teams.c b/offload/test/offloading/gpupgo/pgo_atomic_teams.c deleted file mode 100644 index 7bf3b1c11f28b..0000000000000 --- a/offload/test/offloading/gpupgo/pgo_atomic_teams.c +++ /dev/null @@ -1,102 +0,0 @@ -// RUN: %libomptarget-compile-generic -fcreate-profile \ -// RUN: -Xarch_device -fprofile-generate \ -// RUN: -Xarch_device -fprofile-update=atomic -// RUN: env LLVM_PROFILE_FILE=%basename_t.llvm.profraw \ -// RUN: %libomptarget-run-generic 2>&1 -// RUN: llvm-profdata show --all-functions --counts \ -// RUN: %target_triple.%basename_t.llvm.profraw | \ -// RUN: %fcheck-generic --check-prefix="LLVM-PGO" - -// RUN: %libomptarget-compile-generic -fcreate-profile \ -// RUN: -Xarch_device -fprofile-instr-generate \ -// RUN: -Xarch_device -fprofile-update=atomic -// RUN: env LLVM_PROFILE_FILE=%basename_t.clang.profraw \ -// RUN: %libomptarget-run-generic 2>&1 -// RUN: llvm-profdata show --all-functions --counts \ -// RUN: %target_triple.%basename_t.clang.profraw | \ -// RUN: %fcheck-generic --check-prefix="CLANG-PGO" - -// REQUIRES: gpu -// REQUIRES: pgo - -int test1(int a) { return a / 2; } -int test2(int a) { return a * 2; } - -int main() { - int device_var = 1; - -#pragma omp target teams distribute parallel for num_teams(3) \ - map(tofrom : device_var) - for (int i = 1; i <= 30; i++) { - device_var *= i; - if (i % 2 == 0) { - device_var += test1(device_var); - } - if (i % 3 == 0) { - device_var += test2(device_var); - } - } -} - -// clang-format off -// LLVM-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}: -// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}} -// LLVM-PGO: Counters: 2 -// LLVM-PGO: Block counts: [0, {{.*}}] - -// LLVM-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}_omp_outlined: -// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}} -// LLVM-PGO: Counters: 4 -// LLVM-PGO: Block counts: [{{.*}}, 0, {{.*}}, 0] - -// LLVM-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}_omp_outlined_omp_outlined: -// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}} -// LLVM-PGO: Counters: 4 -// LLVM-PGO: Block counts: [30, 15, 10, {{.*}}] - -// LLVM-PGO-LABEL: test1: -// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}} -// LLVM-PGO: Counters: 1 -// LLVM-PGO: Block counts: [15] - -// LLVM-PGO-LABEL: test2: -// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}} -// LLVM-PGO: Counters: 1 -// LLVM-PGO: Block counts: [10] - -// LLVM-PGO-LABEL: Instrumentation level: -// LLVM-PGO-SAME: IR - -// CLANG-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}: -// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}} -// CLANG-PGO: Counters: 1 -// CLANG-PGO: Function count: {{.*}} -// CLANG-PGO: Block counts: [] - -// CLANG-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}_omp_outlined: -// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}} -// CLANG-PGO: Counters: 1 -// CLANG-PGO: Function count: {{.*}} -// CLANG-PGO: Block counts: [] - -// CLANG-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}_omp_outlined_omp_outlined: -// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}} -// CLANG-PGO: Counters: 4 -// CLANG-PGO: Function count: 30 -// CLANG-PGO: Block counts: [{{.*}}, 15, 10] - -// CLANG-PGO-LABEL: test1: -// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}} -// CLANG-PGO: Counters: 1 -// CLANG-PGO: Function count: 15 -// CLANG-PGO: Block counts: [] - -// CLANG-PGO-LABEL: test2: -// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}} -// CLANG-PGO: Counters: 1 -// CLANG-PGO: Function count: 10 -// CLANG-PGO: Block counts: [] - -// CLANG-PGO-LABEL: Instrumentation level: -// CLANG-PGO-SAME: Front-end -// clang-format on diff --git a/offload/test/offloading/gpupgo/pgo_atomic_threads.c b/offload/test/offloading/gpupgo/pgo_atomic_threads.c deleted file mode 100644 index f0e7111f7a64b..0000000000000 --- a/offload/test/offloading/gpupgo/pgo_atomic_threads.c +++ /dev/null @@ -1,84 +0,0 @@ -// RUN: %libomptarget-compile-generic -fcreate-profile \ -// RUN: -Xarch_device -fprofile-generate \ -// RUN: -Xarch_device -fprofile-update=atomic -// RUN: env LLVM_PROFILE_FILE=%basename_t.llvm.profraw \ -// RUN: %libomptarget-run-generic 2>&1 -// RUN: llvm-profdata show --all-functions --counts \ -// RUN: %target_triple.%basename_t.llvm.profraw | \ -// RUN: %fcheck-generic --check-prefix="LLVM-PGO" - -// RUN: %libomptarget-compile-generic -fcreate-profile \ -// RUN: -Xarch_device -fprofile-instr-generate \ -// RUN: -Xarch_device -fprofile-update=atomic -// RUN: env LLVM_PROFILE_FILE=%basename_t.clang.profraw \ -// RUN: %libomptarget-run-generic 2>&1 -// RUN: llvm-profdata show --all-functions --counts \ -// RUN: %target_triple.%basename_t.clang.profraw | \ -// RUN: %fcheck-generic --check-prefix="CLANG-PGO" - -// REQUIRES: gpu -// REQUIRES: pgo - -int test1(int a) { return a / 2; } - -int main() { - int device_var = 1; -#pragma omp target map(tofrom : device_var) - { -#pragma omp parallel for - for (int i = 1; i <= 10; i++) { - device_var *= i; - if (i % 2 == 0) { - device_var += test1(device_var); - } - } - } -} - -// clang-format off -// LLVM-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}: -// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}} -// LLVM-PGO: Counters: 2 -// LLVM-PGO: Block counts: [0, {{.*}}] - -// LLVM-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}_omp_outlined: -// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}} -// LLVM-PGO: Counters: 5 -// LLVM-PGO: Block counts: [10, 5, {{.*}}, 10, {{.*}}] - -// LLVM-PGO-LABEL: test1: -// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}} -// LLVM-PGO: Counters: 1 -// LLVM-PGO: Block counts: [5] - -// LLVM-PGO-LABEL: Instrumentation level: -// LLVM-PGO-SAME: IR -// LLVM-PGO-SAME: entry_first = 0 -// LLVM-PGO-LABEL: Functions shown: -// LLVM-PGO-SAME: 3 -// LLVM-PGO-LABEL: Maximum function count: -// LLVM-PGO-SAME: 10 - -// CLANG-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}: -// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}} -// CLANG-PGO: Counters: 1 -// CLANG-PGO: Function count: {{.*}} -// CLANG-PGO: Block counts: [] - -// CLANG-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}_omp_outlined: -// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}} -// CLANG-PGO: Counters: 3 -// CLANG-PGO: Function count: {{.*}} -// CLANG-PGO: Block counts: [{{.*}}, 5] - -// CLANG-PGO-LABEL: test1: -// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}} -// CLANG-PGO: Counters: 1 -// CLANG-PGO: Function count: 5 -// CLANG-PGO: Block counts: [] - -// CLANG-PGO-LABEL: Instrumentation level: -// CLANG-PGO-SAME: Front-end -// CLANG-PGO-LABEL: Functions shown: -// CLANG-PGO-SAME: 3 -// clang-format on diff --git a/openmp/runtime/test/ompt/callback.h b/openmp/runtime/test/ompt/callback.h index 0837cc5dfefcd..4dd1db4c4225b 100644 --- a/openmp/runtime/test/ompt/callback.h +++ b/openmp/runtime/test/ompt/callback.h @@ -311,14 +311,6 @@ ompt_label_##id: printf("%" PRIu64 ": current_address=%p or %p or %p\n", \ ompt_get_thread_data()->value, ((char *)addr) - 2, \ ((char *)addr) - 8, ((char *)addr) - 12) -#elif KMP_ARCH_SPARC -// FIXME: Need to distinguish between 32 and 64-bit SPARC? -// On SPARC the NOP instruction is 4 bytes long. -// FIXME: Explain. Can use __builtin_frob_return_addr? -#define print_possible_return_addresses(addr) \ - printf("%" PRIu64 ": current_address=%p or %p\n", \ - ompt_get_thread_data()->value, ((char *)addr) - 12, \ - (char *)addr - 20) #else #error Unsupported target architecture, cannot determine address offset! #endif diff --git a/third-party/unittest/googletest/README.LLVM b/third-party/unittest/googletest/README.LLVM index 56715cff9a73d..b574c7f98be41 100644 --- a/third-party/unittest/googletest/README.LLVM +++ b/third-party/unittest/googletest/README.LLVM @@ -19,6 +19,3 @@ Modified as follows: * Added StringRef support to include/gtest/internal/custom/gtest-printers.h. * Added LLVM printable value support to include/gtest/gtest-message.h and include/gtest/gtest-printers.h. -* Modified `PrintTo(char16_t c, ::std::ostream* os)` and - `PrintTo(char16_t c, ::std::ostream* os)` in include/gtest/gtest-printers.h. - to work around https://github.com/google/googletest/issues/4762 diff --git a/third-party/unittest/googletest/include/gtest/gtest-printers.h b/third-party/unittest/googletest/include/gtest/gtest-printers.h index 409b135fc2141..d0da9bc1843ce 100644 --- a/third-party/unittest/googletest/include/gtest/gtest-printers.h +++ b/third-party/unittest/googletest/include/gtest/gtest-printers.h @@ -510,15 +510,11 @@ GTEST_API_ void PrintTo(wchar_t wc, ::std::ostream* os); GTEST_API_ void PrintTo(char32_t c, ::std::ostream* os); inline void PrintTo(char16_t c, ::std::ostream* os) { - // FIXME: the cast from char16_t to char32_t may be incorrect - // for a lone surrogate - PrintTo(static_cast(c), os); + PrintTo(ImplicitCast_(c), os); } #ifdef __cpp_lib_char8_t inline void PrintTo(char8_t c, ::std::ostream* os) { - // FIXME: the cast from char8_t to char32_t may be incorrect - // for c > 0x7F - PrintTo(static_cast(c), os); + PrintTo(ImplicitCast_(c), os); } #endif diff --git a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel index 221c1c308062d..5bf13d9a6b657 100644 --- a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel @@ -1085,7 +1085,6 @@ cc_library( "//mlir:NVGPUDialect", "//mlir:Pass", "//mlir:SCFDialect", - "//mlir:SCFTransforms", "//mlir:Support", "//mlir:TensorDialect", "//mlir:TransformUtils",