diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/assert-side-effect.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/assert-side-effect.rst index 1355afae92e4f..3ca712b958d04 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/assert-side-effect.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/assert-side-effect.rst @@ -15,6 +15,7 @@ Options .. option:: AssertMacros A comma-separated list of the names of assert macros to be checked. + Default is `assert,NSAssert,NSCAssert`. .. option:: CheckFunctionCalls diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/capturing-this-in-member-variable.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/capturing-this-in-member-variable.rst index bb75e9239d9b5..b09d7d5fce959 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/capturing-this-in-member-variable.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/capturing-this-in-member-variable.rst @@ -32,6 +32,9 @@ Possible fixes: object types. - passing ``this`` pointer as parameter +Options +------- + .. option:: FunctionWrapperTypes A semicolon-separated list of names of types. Used to specify function diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/signed-char-misuse.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/signed-char-misuse.rst index 72860e8cf2a1d..4edbad5eac81b 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/signed-char-misuse.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/signed-char-misuse.rst @@ -104,13 +104,16 @@ so both arguments will have the same type. return false; } +Options +------- + .. option:: CharTypdefsToIgnore A semicolon-separated list of typedef names. In this list, we can list typedefs for ``char`` or ``signed char``, which will be ignored by the check. This is useful when a typedef introduces an integer alias like ``sal_Int8`` or ``int8_t``. In this case, human misinterpretation is not - an issue. + an issue. Default is an empty string. .. option:: DiagnoseSignedUnsignedCharComparisons diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/suspicious-enum-usage.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/suspicious-enum-usage.rst index e87172414a23e..94f29ee11ee39 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/suspicious-enum-usage.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/suspicious-enum-usage.rst @@ -71,6 +71,7 @@ Examples: Options ------- + .. option:: StrictMode Default value: 0. diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/suspicious-stringview-data-usage.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/suspicious-stringview-data-usage.rst index 9b38d83601810..de10da21e8442 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/suspicious-stringview-data-usage.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/suspicious-stringview-data-usage.rst @@ -43,6 +43,9 @@ lead to a compilation error due to the explicit nature of the ``std::string`` constructor. Consequently, developers might opt for ``sv.data()`` to resolve the compilation error, albeit introducing potential hazards as discussed. +Options +------- + .. option:: StringViewTypes Option allows users to specify custom string view-like types for analysis. It diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/too-small-loop-variable.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/too-small-loop-variable.rst index 2c3ded952aa02..077abf0af6880 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/too-small-loop-variable.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/too-small-loop-variable.rst @@ -32,6 +32,9 @@ It's recommended to enable the compiler warning `-Wtautological-constant-out-of-range-compare` as well, since check does not inspect compile-time constant loop boundaries to avoid overlaps with the warning. +Options +------- + .. option:: MagnitudeBitsUpperLimit Upper limit for the magnitude bits of the loop variable. If it's set the check diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/unhandled-self-assignment.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/unhandled-self-assignment.rst index d3cdd5a12fdca..3a6245d2fe35b 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/unhandled-self-assignment.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/unhandled-self-assignment.rst @@ -118,6 +118,9 @@ temporary object into ``this`` (needs a move assignment operator): } }; +Options +------- + .. option:: WarnOnlyIfThisHasSuspiciousField When `true`, the check will warn only if the container class of the copy diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/unintended-char-ostream-output.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/unintended-char-ostream-output.rst index ea1051847129b..95d02b3e2ddda 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/unintended-char-ostream-output.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/unintended-char-ostream-output.rst @@ -39,6 +39,9 @@ Or cast to char to explicitly indicate that output should be a character. std::cout << static_cast(v); +Options +------- + .. option:: CastTypeName When `CastTypeName` is specified, the fix-it will use `CastTypeName` as the diff --git a/clang-tools-extra/docs/clang-tidy/checks/modernize/avoid-bind.rst b/clang-tools-extra/docs/clang-tidy/checks/modernize/avoid-bind.rst index 10374daecb660..64e7e95db8800 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/modernize/avoid-bind.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/modernize/avoid-bind.rst @@ -50,8 +50,9 @@ Options of every placeholder parameter list. Without this, it is possible for a fix-it to perform an incorrect transformation in the case where the result of the ``bind`` is used in the context of a type erased functor such as ``std::function`` which - allows mismatched arguments. For example: + allows mismatched arguments. Default is is `false`. +For example: .. code-block:: c++ diff --git a/clang-tools-extra/docs/clang-tidy/checks/modernize/avoid-c-arrays.rst b/clang-tools-extra/docs/clang-tidy/checks/modernize/avoid-c-arrays.rst index 6a386ecd0fd4b..b7a87bf23967b 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/modernize/avoid-c-arrays.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/modernize/avoid-c-arrays.rst @@ -62,6 +62,9 @@ Similarly, the ``main()`` function is ignored. Its second and third parameters can be either ``char* argv[]`` or ``char** argv``, but cannot be ``std::array<>``. +Options +------- + .. option:: AllowStringArrays When set to `true` (default is `false`), variables of character array type diff --git a/clang-tools-extra/docs/clang-tidy/checks/modernize/loop-convert.rst b/clang-tools-extra/docs/clang-tidy/checks/modernize/loop-convert.rst index 0c423edca1822..3f4783e220501 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/modernize/loop-convert.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/modernize/loop-convert.rst @@ -140,6 +140,9 @@ however the check can be configured to work without C++20 by specifying a function to reverse a range and optionally the header file where that function lives. +Options +------- + .. option:: UseCxx20ReverseRanges When set to true convert loops when in C++20 or later mode using diff --git a/clang-tools-extra/docs/clang-tidy/checks/modernize/make-shared.rst b/clang-tools-extra/docs/clang-tidy/checks/modernize/make-shared.rst index 9c1fceaa06000..982138fc5e781 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/modernize/make-shared.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/modernize/make-shared.rst @@ -51,6 +51,6 @@ Options .. option:: IgnoreDefaultInitialization - If set to non-zero, the check does not suggest edits that will transform + If set to `false`, the check does not suggest edits that will transform default initialization into value initialization, as this can cause - performance regressions. Default is `1`. + performance regressions. Default is `true`. diff --git a/clang-tools-extra/docs/clang-tidy/checks/modernize/make-unique.rst b/clang-tools-extra/docs/clang-tidy/checks/modernize/make-unique.rst index cd474d352bac0..1aaa8701cd0f1 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/modernize/make-unique.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/modernize/make-unique.rst @@ -51,6 +51,6 @@ Options .. option:: IgnoreDefaultInitialization - If set to non-zero, the check does not suggest edits that will transform + If set to `false`, the check does not suggest edits that will transform default initialization into value initialization, as this can cause - performance regressions. Default is `1`. + performance regressions. Default is `true`. diff --git a/clang-tools-extra/docs/clang-tidy/checks/modernize/raw-string-literal.rst b/clang-tools-extra/docs/clang-tidy/checks/modernize/raw-string-literal.rst index 6d7589a0011bb..66e50e80fa70b 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/modernize/raw-string-literal.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/modernize/raw-string-literal.rst @@ -45,6 +45,9 @@ An escaped horizontal tab, form feed, or vertical tab prevents the string literal from being converted. The presence of a horizontal tab, form feed or vertical tab in source code is not visually obvious. +Options +------- + .. option:: DelimiterStem Custom delimiter to escape characters in raw string literals. It is used in diff --git a/clang-tools-extra/docs/clang-tidy/checks/modernize/use-emplace.rst b/clang-tools-extra/docs/clang-tidy/checks/modernize/use-emplace.rst index f61b93aac7c76..e020ece296475 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/modernize/use-emplace.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/modernize/use-emplace.rst @@ -163,7 +163,8 @@ Options Semicolon-separated list of containers without their template parameters and some ``emplace``-like method of the container. Example: ``vector::emplace_back``. Those methods will be checked for improper use and - the check will report when a temporary is unnecessarily created. + the check will report when a temporary is unnecessarily created. All STL + containers with such member functions are supported by default. Example ^^^^^^^ diff --git a/clang-tools-extra/test/clang-tidy/check_clang_tidy.py b/clang-tools-extra/test/clang-tidy/check_clang_tidy.py index 5e39c05f76d86..93c49566a90e3 100755 --- a/clang-tools-extra/test/clang-tidy/check_clang_tidy.py +++ b/clang-tools-extra/test/clang-tidy/check_clang_tidy.py @@ -48,15 +48,16 @@ import re import subprocess import sys +from typing import List, Tuple -def write_file(file_name, text): +def write_file(file_name: str, text: str) -> None: with open(file_name, "w", encoding="utf-8") as f: f.write(text) f.truncate() -def try_run(args, raise_error=True): +def try_run(args: List[str], raise_error: bool = True) -> str: try: process_output = subprocess.check_output(args, stderr=subprocess.STDOUT).decode( errors="ignore" @@ -71,12 +72,12 @@ def try_run(args, raise_error=True): # This class represents the appearance of a message prefix in a file. class MessagePrefix: - def __init__(self, label): + def __init__(self, label: str) -> None: self.has_message = False - self.prefixes = [] + self.prefixes: List[str] = [] self.label = label - def check(self, file_check_suffix, input_text): + def check(self, file_check_suffix: str, input_text: str) -> bool: self.prefix = self.label + file_check_suffix self.has_message = self.prefix in input_text if self.has_message: @@ -85,7 +86,7 @@ def check(self, file_check_suffix, input_text): class CheckRunner: - def __init__(self, args, extra_args): + def __init__(self, args: argparse.Namespace, extra_args: List[str]) -> None: self.resource_dir = args.resource_dir self.assume_file_name = args.assume_filename self.input_file_name = args.input_file_name @@ -143,11 +144,11 @@ def __init__(self, args, extra_args): if self.resource_dir is not None: self.clang_extra_args.append("-resource-dir=%s" % self.resource_dir) - def read_input(self): + def read_input(self) -> None: with open(self.input_file_name, "r", encoding="utf-8") as input_file: self.input_text = input_file.read() - def get_prefixes(self): + def get_prefixes(self) -> None: for suffix in self.check_suffix: if suffix and not re.match("^[A-Z0-9\\-]+$", suffix): sys.exit( @@ -189,7 +190,7 @@ def get_prefixes(self): ) assert expect_diagnosis or self.expect_no_diagnosis - def prepare_test_inputs(self): + def prepare_test_inputs(self) -> None: # Remove the contents of the CHECK lines to avoid CHECKs matching on # themselves. We need to keep the comments to preserve line numbers while # avoiding empty lines which could potentially trigger formatting-related @@ -198,7 +199,7 @@ def prepare_test_inputs(self): write_file(self.temp_file_name, cleaned_test) write_file(self.original_file_name, cleaned_test) - def run_clang_tidy(self): + def run_clang_tidy(self) -> str: args = ( [ "clang-tidy", @@ -238,11 +239,11 @@ def run_clang_tidy(self): print("------------------------------------------------------------------") return clang_tidy_output - def check_no_diagnosis(self, clang_tidy_output): + def check_no_diagnosis(self, clang_tidy_output: str) -> None: if clang_tidy_output != "": sys.exit("No diagnostics were expected, but found the ones above") - def check_fixes(self): + def check_fixes(self) -> None: if self.has_check_fixes: try_run( [ @@ -254,7 +255,7 @@ def check_fixes(self): ] ) - def check_messages(self, clang_tidy_output): + def check_messages(self, clang_tidy_output: str) -> None: if self.has_check_messages: messages_file = self.temp_file_name + ".msg" write_file(messages_file, clang_tidy_output) @@ -268,7 +269,7 @@ def check_messages(self, clang_tidy_output): ] ) - def check_notes(self, clang_tidy_output): + def check_notes(self, clang_tidy_output: str) -> None: if self.has_check_notes: notes_file = self.temp_file_name + ".notes" filtered_output = [ @@ -287,7 +288,7 @@ def check_notes(self, clang_tidy_output): ] ) - def run(self): + def run(self) -> None: self.read_input() if self.export_fixes is None: self.get_prefixes() @@ -313,7 +314,7 @@ def run(self): C_STANDARDS = ["c99", ("c11", "c1x"), "c17", ("c23", "c2x"), "c2y"] -def expand_std(std): +def expand_std(std: str) -> List[str]: split_std, or_later, _ = std.partition("-or-later") if not or_later: @@ -335,11 +336,11 @@ def expand_std(std): return [std] -def csv(string): +def csv(string: str) -> List[str]: return string.split(",") -def parse_arguments(): +def parse_arguments() -> Tuple[argparse.Namespace, List[str]]: parser = argparse.ArgumentParser( prog=pathlib.Path(__file__).stem, description=__doc__, @@ -374,7 +375,7 @@ def parse_arguments(): return parser.parse_known_args() -def main(): +def main() -> None: args, extra_args = parse_arguments() abbreviated_stds = args.std diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst index 9ecac68ae72bf..3f8a5f49313b2 100644 --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -3976,6 +3976,47 @@ the configuration (without a prefix: ``Auto``). +.. _EnumTrailingComma: + +**EnumTrailingComma** (``EnumTrailingCommaStyle``) :versionbadge:`clang-format 21` :ref:`¶ ` + Insert a comma (if missing) or remove the comma at the end of an ``enum`` + enumerator list. + + .. warning:: + + Setting this option to any value other than ``Leave`` could lead to + incorrect code formatting due to clang-format's lack of complete semantic + information. As such, extra care should be taken to review code changes + made by this option. + + Possible values: + + * ``ETC_Leave`` (in configuration: ``Leave``) + Don't insert or remove trailing commas. + + .. code-block:: c++ + + enum { a, b, c, }; + enum Color { red, green, blue }; + + * ``ETC_Insert`` (in configuration: ``Insert``) + Insert trailing commas. + + .. code-block:: c++ + + enum { a, b, c, }; + enum Color { red, green, blue, }; + + * ``ETC_Remove`` (in configuration: ``Remove``) + Remove trailing commas. + + .. code-block:: c++ + + enum { a, b, c }; + enum Color { red, green, blue }; + + + .. _ExperimentalAutoDetectBinPacking: **ExperimentalAutoDetectBinPacking** (``Boolean``) :versionbadge:`clang-format 3.7` :ref:`¶ ` diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index e409f206f6eae..daad01919ecd4 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -413,10 +413,13 @@ Hexagon Support X86 Support ^^^^^^^^^^^ -- Disable ``-m[no-]avx10.1`` and switch ``-m[no-]avx10.2`` to alias of 512 bit - options. -- Change ``-mno-avx10.1-512`` to alias of ``-mno-avx10.1-256`` to disable both - 256 and 512 bit instructions. +- The 256-bit maximum vector register size control was removed from + `AVX10 whitepaper _`. + * Re-target ``m[no-]avx10.1`` to enable AVX10.1 with 512-bit maximum vector register size. + * Emit warning for ``mavx10.x-256``, noting AVX10/256 is not supported. + * Emit warning for ``mavx10.x-512``, noting to use ``m[no-]avx10.x`` instead. + * Emit warning for ``m[no-]evex512``, noting AVX10/256 is not supported. + * The features avx10.x-256/512 keep unchanged and will be removed in the next release. Arm and AArch64 Support ^^^^^^^^^^^^^^^^^^^^^^^ @@ -434,6 +437,7 @@ Windows Support - Clang now can process the `i128` and `ui128` integeral suffixes when MSVC extensions are enabled. This allows for properly processing ``intsafe.h`` in the Windows SDK. +- Clang now supports MSVC vector deleting destructors (GH19772). LoongArch Support ^^^^^^^^^^^^^^^^^ @@ -492,6 +496,8 @@ clang-format - Allow specifying the language (C, C++, or Objective-C) for a ``.h`` file by adding a special comment (e.g. ``// clang-format Language: ObjC``) near the top of the file. +- Add ``EnumTrailingComma`` option for inserting/removing commas at the end of + ``enum`` enumerator lists. libclang -------- diff --git a/clang/include/clang/AST/VTableBuilder.h b/clang/include/clang/AST/VTableBuilder.h index a5de41dbc22f1..e1efe8cddcc5e 100644 --- a/clang/include/clang/AST/VTableBuilder.h +++ b/clang/include/clang/AST/VTableBuilder.h @@ -150,7 +150,7 @@ class VTableComponent { bool isRTTIKind() const { return isRTTIKind(getKind()); } - GlobalDecl getGlobalDecl() const { + GlobalDecl getGlobalDecl(bool HasVectorDeletingDtors) const { assert(isUsedFunctionPointerKind() && "GlobalDecl can be created only from virtual function"); @@ -161,7 +161,9 @@ class VTableComponent { case CK_CompleteDtorPointer: return GlobalDecl(DtorDecl, CXXDtorType::Dtor_Complete); case CK_DeletingDtorPointer: - return GlobalDecl(DtorDecl, CXXDtorType::Dtor_Deleting); + return GlobalDecl(DtorDecl, (HasVectorDeletingDtors) + ? CXXDtorType::Dtor_VectorDeleting + : CXXDtorType::Dtor_Deleting); case CK_VCallOffset: case CK_VBaseOffset: case CK_OffsetToTop: diff --git a/clang/include/clang/Basic/ABI.h b/clang/include/clang/Basic/ABI.h index 231bad799a42c..48969e4f295c3 100644 --- a/clang/include/clang/Basic/ABI.h +++ b/clang/include/clang/Basic/ABI.h @@ -31,10 +31,11 @@ enum CXXCtorType { /// C++ destructor types. enum CXXDtorType { - Dtor_Deleting, ///< Deleting dtor - Dtor_Complete, ///< Complete object dtor - Dtor_Base, ///< Base object dtor - Dtor_Comdat ///< The COMDAT used for dtors + Dtor_Deleting, ///< Deleting dtor + Dtor_Complete, ///< Complete object dtor + Dtor_Base, ///< Base object dtor + Dtor_Comdat, ///< The COMDAT used for dtors + Dtor_VectorDeleting ///< Vector deleting dtor }; } // end namespace clang diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index b9f08d96151c9..e6e9ebbc2c304 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -377,13 +377,12 @@ def CXX11WarnSuggestOverride : DiagGroup<"suggest-override">; def WarnUnnecessaryVirtualSpecifier : DiagGroup<"unnecessary-virtual-specifier"> { code Documentation = [{ Warns when a ``final`` class contains a virtual method (including virtual -destructors). Since ``final`` classes cannot be subclassed, their methods -cannot be overridden, and hence the ``virtual`` specifier is useless. +destructors) that does not override anything. Since ``final`` classes cannot be +subclassed, their methods cannot be overridden, so there is no point to +introducing new ``virtual`` methods. The warning also detects virtual methods in classes whose destructor is ``final``, for the same reason. - -The warning does not fire on virtual methods which are also marked ``override``. }]; } diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 1e900437d41ce..5e45482584946 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -2733,7 +2733,7 @@ def note_final_dtor_non_final_class_silence : Note< "mark %0 as '%select{final|sealed}1' to silence this warning">; def warn_unnecessary_virtual_specifier : Warning< "virtual method %0 is inside a 'final' class and can never be overridden">, - InGroup, DefaultIgnore; + InGroup; // C++11 attributes def err_repeat_attribute : Error<"%0 attribute cannot be repeated">; @@ -7031,10 +7031,10 @@ def err_offsetof_incomplete_type : Error< def err_offsetof_record_type : Error< "offsetof requires struct, union, or class type, %0 invalid">; def err_offsetof_array_type : Error<"offsetof requires array type, %0 invalid">; -def ext_offsetof_non_pod_type : ExtWarn<"offset of on non-POD type %0">, +def ext_offsetof_non_pod_type : ExtWarn<"'offsetof' on non-POD type %0">, InGroup; def ext_offsetof_non_standardlayout_type : ExtWarn< - "offset of on non-standard-layout type %0">, InGroup; + "'offsetof' on non-standard-layout type %0">, InGroup; def err_offsetof_bitfield : Error<"cannot compute offset of bit-field %0">; def err_offsetof_field_of_virtual_base : Error< "invalid application of 'offsetof' to a field of a virtual base">; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 3af072242d039..89cb03cc33b98 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -6409,11 +6409,11 @@ def mavx10_1_256 : Flag<["-"], "mavx10.1-256">, Group, Group; def mavx10_1_512 : Flag<["-"], "mavx10.1-512">, Group; def mno_avx10_1_512 : Flag<["-"], "mno-avx10.1-512">, Alias; -def mavx10_1 : Flag<["-"], "mavx10.1">, Flags<[Unsupported]>; -def mno_avx10_1 : Flag<["-"], "mno-avx10.1">, Flags<[Unsupported]>; +def mavx10_1 : Flag<["-"], "mavx10.1">, Group; +def mno_avx10_1 : Flag<["-"], "mno-avx10.1">, Group; def mavx10_2_256 : Flag<["-"], "mavx10.2-256">, Group; def mavx10_2_512 : Flag<["-"], "mavx10.2-512">, Group; -def mavx10_2 : Flag<["-"], "mavx10.2">, Alias; +def mavx10_2 : Flag<["-"], "mavx10.2">, Group; def mno_avx10_2 : Flag<["-"], "mno-avx10.2">, Group; def mavx2 : Flag<["-"], "mavx2">, Group; def mno_avx2 : Flag<["-"], "mno-avx2">, Group; diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index fec47a248abb4..cea5e257659d6 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -2704,6 +2704,39 @@ struct FormatStyle { /// \version 12 EmptyLineBeforeAccessModifierStyle EmptyLineBeforeAccessModifier; + /// Styles for ``enum`` trailing commas. + enum EnumTrailingCommaStyle : int8_t { + /// Don't insert or remove trailing commas. + /// \code + /// enum { a, b, c, }; + /// enum Color { red, green, blue }; + /// \endcode + ETC_Leave, + /// Insert trailing commas. + /// \code + /// enum { a, b, c, }; + /// enum Color { red, green, blue, }; + /// \endcode + ETC_Insert, + /// Remove trailing commas. + /// \code + /// enum { a, b, c }; + /// enum Color { red, green, blue }; + /// \endcode + ETC_Remove, + }; + + /// Insert a comma (if missing) or remove the comma at the end of an ``enum`` + /// enumerator list. + /// \warning + /// Setting this option to any value other than ``Leave`` could lead to + /// incorrect code formatting due to clang-format's lack of complete semantic + /// information. As such, extra care should be taken to review code changes + /// made by this option. + /// \endwarning + /// \version 21 + EnumTrailingCommaStyle EnumTrailingComma; + /// If ``true``, clang-format detects whether function calls and /// definitions are formatted with one parameter per line. /// @@ -5323,6 +5356,7 @@ struct FormatStyle { DisableFormat == R.DisableFormat && EmptyLineAfterAccessModifier == R.EmptyLineAfterAccessModifier && EmptyLineBeforeAccessModifier == R.EmptyLineBeforeAccessModifier && + EnumTrailingComma == R.EnumTrailingComma && ExperimentalAutoDetectBinPacking == R.ExperimentalAutoDetectBinPacking && FixNamespaceComments == R.FixNamespaceComments && diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 066bce61c74c1..c74e709ce06d2 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -14660,7 +14660,8 @@ class Sema final : public SemaBase { bool First = true); const NormalizedConstraint *getNormalizedAssociatedConstraints( - NamedDecl *ConstrainedDecl, ArrayRef AssociatedConstraints); + const NamedDecl *ConstrainedDecl, + ArrayRef AssociatedConstraints); /// \brief Check whether the given declaration's associated constraints are /// at least as constrained than another declaration's according to the @@ -14670,28 +14671,30 @@ class Sema final : public SemaBase { /// at least constrained than D2, and false otherwise. /// /// \returns true if an error occurred, false otherwise. - bool IsAtLeastAsConstrained(NamedDecl *D1, MutableArrayRef AC1, - NamedDecl *D2, MutableArrayRef AC2, - bool &Result); + bool IsAtLeastAsConstrained(const NamedDecl *D1, + MutableArrayRef AC1, + const NamedDecl *D2, + MutableArrayRef AC2, bool &Result); /// If D1 was not at least as constrained as D2, but would've been if a pair /// of atomic constraints involved had been declared in a concept and not /// repeated in two separate places in code. /// \returns true if such a diagnostic was emitted, false otherwise. bool MaybeEmitAmbiguousAtomicConstraintsDiagnostic( - NamedDecl *D1, ArrayRef AC1, NamedDecl *D2, + const NamedDecl *D1, ArrayRef AC1, const NamedDecl *D2, ArrayRef AC2); private: /// Caches pairs of template-like decls whose associated constraints were /// checked for subsumption and whether or not the first's constraints did in /// fact subsume the second's. - llvm::DenseMap, bool> SubsumptionCache; + llvm::DenseMap, bool> + SubsumptionCache; /// Caches the normalized associated constraints of declarations (concepts or /// constrained declarations). If an error occurred while normalizing the /// associated constraints of the template or concept, nullptr will be cached /// here. - llvm::DenseMap NormalizationCache; + llvm::DenseMap NormalizationCache; llvm::ContextualFoldingSet SatisfactionCache; diff --git a/clang/include/clang/Sema/SemaConcept.h b/clang/include/clang/Sema/SemaConcept.h index fda22b779c636..cbb3720c30ee2 100644 --- a/clang/include/clang/Sema/SemaConcept.h +++ b/clang/include/clang/Sema/SemaConcept.h @@ -31,10 +31,10 @@ enum { ConstraintAlignment = 8 }; struct alignas(ConstraintAlignment) AtomicConstraint { const Expr *ConstraintExpr; - NamedDecl *ConstraintDecl; + const NamedDecl *ConstraintDecl; std::optional> ParameterMapping; - AtomicConstraint(const Expr *ConstraintExpr, NamedDecl *ConstraintDecl) + AtomicConstraint(const Expr *ConstraintExpr, const NamedDecl *ConstraintDecl) : ConstraintExpr(ConstraintExpr), ConstraintDecl(ConstraintDecl) {}; bool hasMatchingParameterMapping(ASTContext &C, @@ -114,9 +114,9 @@ struct NormalizedConstraint { private: static std::optional - fromConstraintExprs(Sema &S, NamedDecl *D, ArrayRef E); + fromConstraintExprs(Sema &S, const NamedDecl *D, ArrayRef E); static std::optional - fromConstraintExpr(Sema &S, NamedDecl *D, const Expr *E); + fromConstraintExpr(Sema &S, const NamedDecl *D, const Expr *E); }; struct alignas(ConstraintAlignment) NormalizedConstraintPair { @@ -137,7 +137,7 @@ struct alignas(ConstraintAlignment) FoldExpandedConstraint { }; const NormalizedConstraint *getNormalizedAssociatedConstraints( - Sema &S, NamedDecl *ConstrainedDecl, + Sema &S, const NamedDecl *ConstrainedDecl, ArrayRef AssociatedConstraints); /// \brief SubsumptionChecker establishes subsumption @@ -149,8 +149,8 @@ class SubsumptionChecker { SubsumptionChecker(Sema &SemaRef, SubsumptionCallable Callable = {}); - std::optional Subsumes(NamedDecl *DP, ArrayRef P, - NamedDecl *DQ, ArrayRef Q); + std::optional Subsumes(const NamedDecl *DP, ArrayRef P, + const NamedDecl *DQ, ArrayRef Q); bool Subsumes(const NormalizedConstraint *P, const NormalizedConstraint *Q); diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index c9d1bea4c623a..2d9480ebcf00c 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -14135,7 +14135,6 @@ static QualType getCommonSugarTypeNode(ASTContext &Ctx, const Type *X, CANONICAL_TYPE(IncompleteArray) CANONICAL_TYPE(HLSLAttributedResource) CANONICAL_TYPE(LValueReference) - CANONICAL_TYPE(MemberPointer) CANONICAL_TYPE(ObjCInterface) CANONICAL_TYPE(ObjCObject) CANONICAL_TYPE(ObjCObjectPointer) @@ -14313,6 +14312,15 @@ static QualType getCommonSugarTypeNode(ASTContext &Ctx, const Type *X, return QualType(); return Ctx.getUsingType(CD, Ctx.getQualifiedType(Underlying)); } + case Type::MemberPointer: { + const auto *PX = cast(X), + *PY = cast(Y); + CXXRecordDecl *Cls = PX->getMostRecentCXXRecordDecl(); + assert(Cls == PY->getMostRecentCXXRecordDecl()); + return Ctx.getMemberPointerType( + ::getCommonPointeeType(Ctx, PX, PY), + ::getCommonQualifier(Ctx, PX, PY, /*IsSame=*/false), Cls); + } case Type::CountAttributed: { const auto *DX = cast(X), *DY = cast(Y); diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index 981cdb3c806b1..49a04861ae25d 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -6004,6 +6004,8 @@ void CXXNameMangler::mangleCXXDtorType(CXXDtorType T) { case Dtor_Comdat: Out << "D5"; break; + case Dtor_VectorDeleting: + llvm_unreachable("Itanium ABI does not use vector deleting dtors"); } } diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp index 15de407e122d8..7e964124a9fec 100644 --- a/clang/lib/AST/MicrosoftMangle.cpp +++ b/clang/lib/AST/MicrosoftMangle.cpp @@ -1484,8 +1484,9 @@ void MicrosoftCXXNameMangler::mangleCXXDtorType(CXXDtorType T) { // ::= ?_G # scalar deleting destructor case Dtor_Deleting: Out << "?_G"; return; // ::= ?_E # vector deleting destructor - // FIXME: Add a vector deleting dtor type. It goes in the vtable, so we need - // it. + case Dtor_VectorDeleting: + Out << "?_E"; + return; case Dtor_Comdat: llvm_unreachable("not expecting a COMDAT"); } @@ -2886,9 +2887,12 @@ void MicrosoftCXXNameMangler::mangleFunctionType(const FunctionType *T, // ::= @ # structors (they have no declared return type) if (IsStructor) { if (isa(D) && isStructorDecl(D)) { - // The scalar deleting destructor takes an extra int argument which is not - // reflected in the AST. - if (StructorType == Dtor_Deleting) { + // The deleting destructors take an extra argument of type int that + // indicates whether the storage for the object should be deleted and + // whether a single object or an array of objects is being destroyed. This + // extra argument is not reflected in the AST. + if (StructorType == Dtor_Deleting || + StructorType == Dtor_VectorDeleting) { Out << (PointersAre64Bit ? "PEAXI@Z" : "PAXI@Z"); return; } @@ -3861,10 +3865,10 @@ void MicrosoftMangleContextImpl::mangleCXXDtorThunk(const CXXDestructorDecl *DD, const ThunkInfo &Thunk, bool /*ElideOverrideInfo*/, raw_ostream &Out) { - // FIXME: Actually, the dtor thunk should be emitted for vector deleting - // dtors rather than scalar deleting dtors. Just use the vector deleting dtor - // mangling manually until we support both deleting dtor types. - assert(Type == Dtor_Deleting); + // The dtor thunk should use vector deleting dtor mangling, however as an + // optimization we may end up emitting only scalar deleting dtor body, so just + // use the vector deleting dtor mangling manually. + assert(Type == Dtor_Deleting || Type == Dtor_VectorDeleting); msvc_hashing_ostream MHO(Out); MicrosoftCXXNameMangler Mangler(*this, MHO, DD, Type); Mangler.getStream() << "??_E"; diff --git a/clang/lib/AST/VTableBuilder.cpp b/clang/lib/AST/VTableBuilder.cpp index 18893b996b5d6..21f9d343c6ee7 100644 --- a/clang/lib/AST/VTableBuilder.cpp +++ b/clang/lib/AST/VTableBuilder.cpp @@ -1735,8 +1735,8 @@ void ItaniumVTableBuilder::LayoutPrimaryAndSecondaryVTables( const CXXMethodDecl *MD = I.first; const MethodInfo &MI = I.second; if (const CXXDestructorDecl *DD = dyn_cast(MD)) { - MethodVTableIndices[GlobalDecl(DD, Dtor_Complete)] - = MI.VTableIndex - AddressPoint; + MethodVTableIndices[GlobalDecl(DD, Dtor_Complete)] = + MI.VTableIndex - AddressPoint; MethodVTableIndices[GlobalDecl(DD, Dtor_Deleting)] = MI.VTableIndex + 1 - AddressPoint; } else { @@ -2657,7 +2657,11 @@ class VFTableBuilder { MethodVFTableLocation Loc(MI.VBTableIndex, WhichVFPtr.getVBaseWithVPtr(), WhichVFPtr.NonVirtualOffset, MI.VFTableIndex); if (const CXXDestructorDecl *DD = dyn_cast(MD)) { - MethodVFTableLocations[GlobalDecl(DD, Dtor_Deleting)] = Loc; + // In Microsoft ABI vftable always references vector deleting dtor. + CXXDtorType DtorTy = Context.getTargetInfo().getCXXABI().isMicrosoft() + ? Dtor_VectorDeleting + : Dtor_Deleting; + MethodVFTableLocations[GlobalDecl(DD, DtorTy)] = Loc; } else { MethodVFTableLocations[MD] = Loc; } @@ -3287,7 +3291,10 @@ void VFTableBuilder::dumpLayout(raw_ostream &Out) { const CXXDestructorDecl *DD = Component.getDestructorDecl(); DD->printQualifiedName(Out); - Out << "() [scalar deleting]"; + if (Context.getTargetInfo().getCXXABI().isMicrosoft()) + Out << "() [vector deleting]"; + else + Out << "() [scalar deleting]"; if (DD->isPureVirtual()) Out << " [pure]"; @@ -3736,8 +3743,7 @@ void MicrosoftVTableContext::computeVTableRelatedInformation( } } - MethodVFTableLocations.insert(NewMethodLocations.begin(), - NewMethodLocations.end()); + MethodVFTableLocations.insert_range(NewMethodLocations); if (Context.getLangOpts().DumpVTableLayouts) dumpMethodLocations(RD, NewMethodLocations, llvm::outs()); } @@ -3758,7 +3764,7 @@ void MicrosoftVTableContext::dumpMethodLocations( PredefinedIdentKind::PrettyFunctionNoVirtual, MD); if (isa(MD)) { - IndicesMap[I.second] = MethodName + " [scalar deleting]"; + IndicesMap[I.second] = MethodName + " [vector deleting]"; } else { IndicesMap[I.second] = MethodName; } @@ -3824,8 +3830,7 @@ const VirtualBaseInfo &MicrosoftVTableContext::computeVBTableRelatedInformation( // virtual bases come first so that the layout is the same. const VirtualBaseInfo &BaseInfo = computeVBTableRelatedInformation(VBPtrBase); - VBI->VBTableIndices.insert(BaseInfo.VBTableIndices.begin(), - BaseInfo.VBTableIndices.end()); + VBI->VBTableIndices.insert_range(BaseInfo.VBTableIndices); } // New vbases are added to the end of the vbtable. @@ -3875,7 +3880,7 @@ MicrosoftVTableContext::getMethodVFTableLocation(GlobalDecl GD) { assert(hasVtableSlot(cast(GD.getDecl())) && "Only use this method for virtual methods or dtors"); if (isa(GD.getDecl())) - assert(GD.getDtorType() == Dtor_Deleting); + assert(GD.getDtorType() == Dtor_VectorDeleting); GD = GD.getCanonicalDecl(); diff --git a/clang/lib/CodeGen/CGCXX.cpp b/clang/lib/CodeGen/CGCXX.cpp index 78a7b021855b7..6f47e24eed5b3 100644 --- a/clang/lib/CodeGen/CGCXX.cpp +++ b/clang/lib/CodeGen/CGCXX.cpp @@ -175,7 +175,6 @@ bool CodeGenModule::TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D) { // requires explicit comdat support in the IL. if (llvm::GlobalValue::isWeakForLinker(TargetLinkage)) return true; - // Create the alias with no name. auto *Alias = llvm::GlobalAlias::create(AliasValueType, 0, Linkage, "", Aliasee, &getModule()); @@ -201,6 +200,42 @@ bool CodeGenModule::TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D) { return false; } +/// Emit a definition as a global alias for another definition, unconditionally. +void CodeGenModule::EmitDefinitionAsAlias(GlobalDecl AliasDecl, + GlobalDecl TargetDecl) { + + llvm::Type *AliasValueType = getTypes().GetFunctionType(AliasDecl); + + StringRef MangledName = getMangledName(AliasDecl); + llvm::GlobalValue *Entry = GetGlobalValue(MangledName); + if (Entry && !Entry->isDeclaration()) + return; + auto *Aliasee = cast(GetAddrOfGlobal(TargetDecl)); + + // Determine the linkage type for the alias. + llvm::GlobalValue::LinkageTypes Linkage = getFunctionLinkage(AliasDecl); + + // Create the alias with no name. + auto *Alias = llvm::GlobalAlias::create(AliasValueType, 0, Linkage, "", + Aliasee, &getModule()); + // Destructors are always unnamed_addr. + Alias->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); + + if (Entry) { + assert(Entry->getValueType() == AliasValueType && + Entry->getAddressSpace() == Alias->getAddressSpace() && + "declaration exists with different type"); + Alias->takeName(Entry); + Entry->replaceAllUsesWith(Alias); + Entry->eraseFromParent(); + } else { + Alias->setName(MangledName); + } + + // Set any additional necessary attributes for the alias. + SetCommonAttributes(AliasDecl, Alias); +} + llvm::Function *CodeGenModule::codegenCXXStructor(GlobalDecl GD) { const CGFunctionInfo &FnInfo = getTypes().arrangeCXXStructorDeclaration(GD); auto *Fn = cast( diff --git a/clang/lib/CodeGen/CGCXXABI.cpp b/clang/lib/CodeGen/CGCXXABI.cpp index fd35f2adfa2d2..9f77fbec21380 100644 --- a/clang/lib/CodeGen/CGCXXABI.cpp +++ b/clang/lib/CodeGen/CGCXXABI.cpp @@ -272,6 +272,20 @@ void CGCXXABI::ReadArrayCookie(CodeGenFunction &CGF, Address ptr, numElements = readArrayCookieImpl(CGF, allocAddr, cookieSize); } +void CGCXXABI::ReadArrayCookie(CodeGenFunction &CGF, Address ptr, + QualType eltTy, llvm::Value *&numElements, + llvm::Value *&allocPtr, CharUnits &cookieSize) { + assert(eltTy.isDestructedType()); + + // Derive a char* in the same address space as the pointer. + ptr = ptr.withElementType(CGF.Int8Ty); + + cookieSize = getArrayCookieSizeImpl(eltTy); + Address allocAddr = CGF.Builder.CreateConstInBoundsByteGEP(ptr, -cookieSize); + allocPtr = allocAddr.emitRawPointer(CGF); + numElements = readArrayCookieImpl(CGF, allocAddr, cookieSize); +} + llvm::Value *CGCXXABI::readArrayCookieImpl(CodeGenFunction &CGF, Address ptr, CharUnits cookieSize) { diff --git a/clang/lib/CodeGen/CGCXXABI.h b/clang/lib/CodeGen/CGCXXABI.h index 687ff7fb84444..148a7ba6df7e6 100644 --- a/clang/lib/CodeGen/CGCXXABI.h +++ b/clang/lib/CodeGen/CGCXXABI.h @@ -275,6 +275,7 @@ class CGCXXABI { virtual CatchTypeInfo getCatchAllTypeInfo(); virtual bool shouldTypeidBeNullChecked(QualType SrcRecordTy) = 0; + virtual bool hasVectorDeletingDtors() = 0; virtual void EmitBadTypeidCall(CodeGenFunction &CGF) = 0; virtual llvm::Value *EmitTypeid(CodeGenFunction &CGF, QualType SrcRecordTy, Address ThisPtr, @@ -575,6 +576,12 @@ class CGCXXABI { QualType ElementType, llvm::Value *&NumElements, llvm::Value *&AllocPtr, CharUnits &CookieSize); + /// Reads the array cookie associated with the given pointer, + /// that should have one. + void ReadArrayCookie(CodeGenFunction &CGF, Address Ptr, QualType ElementType, + llvm::Value *&NumElements, llvm::Value *&AllocPtr, + CharUnits &CookieSize); + /// Return whether the given global decl needs a VTT parameter. virtual bool NeedsVTTParameter(GlobalDecl GD); diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp index 98c93b5bb4883..f508930cc9f2b 100644 --- a/clang/lib/CodeGen/CGClass.cpp +++ b/clang/lib/CodeGen/CGClass.cpp @@ -1432,6 +1432,70 @@ static bool CanSkipVTablePointerInitialization(CodeGenFunction &CGF, return true; } +static void EmitConditionalArrayDtorCall(const CXXDestructorDecl *DD, + CodeGenFunction &CGF, + llvm::Value *ShouldDeleteCondition) { + Address ThisPtr = CGF.LoadCXXThisAddress(); + llvm::BasicBlock *ScalarBB = CGF.createBasicBlock("dtor.scalar"); + llvm::BasicBlock *callDeleteBB = + CGF.createBasicBlock("dtor.call_delete_after_array_destroy"); + llvm::BasicBlock *VectorBB = CGF.createBasicBlock("dtor.vector"); + auto *CondTy = cast(ShouldDeleteCondition->getType()); + llvm::Value *CheckTheBitForArrayDestroy = CGF.Builder.CreateAnd( + ShouldDeleteCondition, llvm::ConstantInt::get(CondTy, 2)); + llvm::Value *ShouldDestroyArray = + CGF.Builder.CreateIsNull(CheckTheBitForArrayDestroy); + CGF.Builder.CreateCondBr(ShouldDestroyArray, ScalarBB, VectorBB); + + CGF.EmitBlock(VectorBB); + + llvm::Value *numElements = nullptr; + llvm::Value *allocatedPtr = nullptr; + CharUnits cookieSize; + QualType EltTy = DD->getThisType()->getPointeeType(); + CGF.CGM.getCXXABI().ReadArrayCookie(CGF, ThisPtr, EltTy, numElements, + allocatedPtr, cookieSize); + + // Destroy the elements. + QualType::DestructionKind dtorKind = EltTy.isDestructedType(); + + assert(dtorKind); + assert(numElements && "no element count for a type with a destructor!"); + + CharUnits elementSize = CGF.getContext().getTypeSizeInChars(EltTy); + CharUnits elementAlign = + ThisPtr.getAlignment().alignmentOfArrayElement(elementSize); + + llvm::Value *arrayBegin = ThisPtr.emitRawPointer(CGF); + llvm::Value *arrayEnd = CGF.Builder.CreateInBoundsGEP( + ThisPtr.getElementType(), arrayBegin, numElements, "delete.end"); + + // We already checked that the array is not 0-length before entering vector + // deleting dtor. + CGF.emitArrayDestroy(arrayBegin, arrayEnd, EltTy, elementAlign, + CGF.getDestroyer(dtorKind), + /*checkZeroLength*/ false, CGF.needsEHCleanup(dtorKind)); + + llvm::BasicBlock *VectorBBCont = CGF.createBasicBlock("dtor.vector.cont"); + CGF.EmitBlock(VectorBBCont); + + llvm::Value *CheckTheBitForDeleteCall = CGF.Builder.CreateAnd( + ShouldDeleteCondition, llvm::ConstantInt::get(CondTy, 1)); + + llvm::Value *ShouldCallDelete = + CGF.Builder.CreateIsNull(CheckTheBitForDeleteCall); + CGF.Builder.CreateCondBr(ShouldCallDelete, CGF.ReturnBlock.getBlock(), + callDeleteBB); + CGF.EmitBlock(callDeleteBB); + const CXXDestructorDecl *Dtor = cast(CGF.CurCodeDecl); + const CXXRecordDecl *ClassDecl = Dtor->getParent(); + CGF.EmitDeleteCall(Dtor->getOperatorDelete(), allocatedPtr, + CGF.getContext().getTagDeclType(ClassDecl)); + + CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); + CGF.EmitBlock(ScalarBB); +} + /// EmitDestructorBody - Emits the body of the current destructor. void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) { const CXXDestructorDecl *Dtor = cast(CurGD.getDecl()); @@ -1461,7 +1525,9 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) { // outside of the function-try-block, which means it's always // possible to delegate the destructor body to the complete // destructor. Do so. - if (DtorType == Dtor_Deleting) { + if (DtorType == Dtor_Deleting || DtorType == Dtor_VectorDeleting) { + if (CXXStructorImplicitParamValue && DtorType == Dtor_VectorDeleting) + EmitConditionalArrayDtorCall(Dtor, *this, CXXStructorImplicitParamValue); RunCleanupsScope DtorEpilogue(*this); EnterDtorCleanups(Dtor, Dtor_Deleting); if (HaveInsertPoint()) { @@ -1490,6 +1556,8 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) { switch (DtorType) { case Dtor_Comdat: llvm_unreachable("not expecting a COMDAT"); case Dtor_Deleting: llvm_unreachable("already handled deleting case"); + case Dtor_VectorDeleting: + llvm_unreachable("already handled vector deleting case"); case Dtor_Complete: assert((Body || getTarget().getCXXABI().isMicrosoft()) && @@ -1572,7 +1640,6 @@ namespace { return CGF.EmitScalarExpr(ThisArg); return CGF.LoadCXXThis(); } - /// Call the operator delete associated with the current destructor. struct CallDtorDelete final : EHScopeStack::Cleanup { CallDtorDelete() {} @@ -1591,8 +1658,10 @@ namespace { bool ReturnAfterDelete) { llvm::BasicBlock *callDeleteBB = CGF.createBasicBlock("dtor.call_delete"); llvm::BasicBlock *continueBB = CGF.createBasicBlock("dtor.continue"); - llvm::Value *ShouldCallDelete - = CGF.Builder.CreateIsNull(ShouldDeleteCondition); + auto *CondTy = cast(ShouldDeleteCondition->getType()); + llvm::Value *CheckTheBit = CGF.Builder.CreateAnd( + ShouldDeleteCondition, llvm::ConstantInt::get(CondTy, 1)); + llvm::Value *ShouldCallDelete = CGF.Builder.CreateIsNull(CheckTheBit); CGF.Builder.CreateCondBr(ShouldCallDelete, continueBB, callDeleteBB); CGF.EmitBlock(callDeleteBB); diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index ba0dec99d6ae8..52aa956121d73 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -2119,7 +2119,8 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction( // Emit MS ABI vftable information. There is only one entry for the // deleting dtor. const auto *DD = dyn_cast(Method); - GlobalDecl GD = DD ? GlobalDecl(DD, Dtor_Deleting) : GlobalDecl(Method); + GlobalDecl GD = + DD ? GlobalDecl(DD, Dtor_VectorDeleting) : GlobalDecl(Method); MethodVFTableLocation ML = CGM.getMicrosoftVTableContext().getMethodVFTableLocation(GD); VIndex = ML.Index; diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp index 5d96959065dd9..5c11c0bceade7 100644 --- a/clang/lib/CodeGen/CGExprCXX.cpp +++ b/clang/lib/CodeGen/CGExprCXX.cpp @@ -1209,6 +1209,8 @@ void CodeGenFunction::EmitNewArrayInitializer( EmitCXXAggrConstructorCall(Ctor, NumElements, CurPtr, CCE, /*NewPointerIsChecked*/true, CCE->requiresZeroInitialization()); + if (CGM.getCXXABI().hasVectorDeletingDtors()) + CGM.requireVectorDestructorDefinition(Ctor->getParent()); return; } @@ -1912,10 +1914,8 @@ static void EmitDestroyingObjectDelete(CodeGenFunction &CGF, /// Emit the code for deleting a single object. /// \return \c true if we started emitting UnconditionalDeleteBlock, \c false /// if not. -static bool EmitObjectDelete(CodeGenFunction &CGF, - const CXXDeleteExpr *DE, - Address Ptr, - QualType ElementType, +static bool EmitObjectDelete(CodeGenFunction &CGF, const CXXDeleteExpr *DE, + Address Ptr, QualType ElementType, llvm::BasicBlock *UnconditionalDeleteBlock) { // C++11 [expr.delete]p3: // If the static type of the object to be deleted is different from its @@ -2131,6 +2131,40 @@ void CodeGenFunction::EmitCXXDeleteExpr(const CXXDeleteExpr *E) { assert(ConvertTypeForMem(DeleteTy) == Ptr.getElementType()); + if (E->isArrayForm() && CGM.getCXXABI().hasVectorDeletingDtors()) { + if (auto *RD = DeleteTy->getAsCXXRecordDecl()) { + auto *Dtor = RD->getDestructor(); + if (Dtor && Dtor->isVirtual()) { + llvm::Value *NumElements = nullptr; + llvm::Value *AllocatedPtr = nullptr; + CharUnits CookieSize; + llvm::BasicBlock *bodyBB = createBasicBlock("vdtor.call"); + llvm::BasicBlock *doneBB = createBasicBlock("vdtor.nocall"); + // Check array cookie to see if the array has 0 length. Don't call + // the destructor in that case. + CGM.getCXXABI().ReadArrayCookie(*this, Ptr, E, DeleteTy, NumElements, + AllocatedPtr, CookieSize); + + auto *CondTy = cast(NumElements->getType()); + llvm::Value *isEmpty = Builder.CreateICmpEQ( + NumElements, llvm::ConstantInt::get(CondTy, 0)); + Builder.CreateCondBr(isEmpty, doneBB, bodyBB); + + // Delete cookie for empty array. + const FunctionDecl *operatorDelete = E->getOperatorDelete(); + EmitBlock(doneBB); + EmitDeleteCall(operatorDelete, AllocatedPtr, DeleteTy, NumElements, + CookieSize); + EmitBranch(DeleteEnd); + + EmitBlock(bodyBB); + if (!EmitObjectDelete(*this, E, Ptr, DeleteTy, DeleteEnd)) + EmitBlock(DeleteEnd); + return; + } + } + } + if (E->isArrayForm()) { EmitArrayDelete(*this, E, Ptr, DeleteTy); EmitBlock(DeleteEnd); diff --git a/clang/lib/CodeGen/CGVTables.cpp b/clang/lib/CodeGen/CGVTables.cpp index c7b36957b2e57..dcd1fa77fa834 100644 --- a/clang/lib/CodeGen/CGVTables.cpp +++ b/clang/lib/CodeGen/CGVTables.cpp @@ -769,7 +769,8 @@ void CodeGenVTables::addVTableComponent(ConstantArrayBuilder &builder, case VTableComponent::CK_FunctionPointer: case VTableComponent::CK_CompleteDtorPointer: case VTableComponent::CK_DeletingDtorPointer: { - GlobalDecl GD = component.getGlobalDecl(); + GlobalDecl GD = + component.getGlobalDecl(CGM.getCXXABI().hasVectorDeletingDtors()); const bool IsThunk = nextVTableThunkIndex < layout.vtable_thunks().size() && diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 5dbd50be6ca1a..8f9cf965af2b9 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -853,8 +853,7 @@ void CodeGenModule::Release() { if (CXX20ModuleInits && Primary && !Primary->isHeaderLikeModule()) EmitModuleInitializers(Primary); EmitDeferred(); - DeferredDecls.insert(EmittedDeferredDecls.begin(), - EmittedDeferredDecls.end()); + DeferredDecls.insert_range(EmittedDeferredDecls); EmittedDeferredDecls.clear(); EmitVTablesOpportunistically(); applyGlobalValReplacements(); @@ -7938,3 +7937,51 @@ void CodeGenModule::moveLazyEmissionStates(CodeGenModule *NewBuilder) { NewBuilder->ABI->MangleCtx = std::move(ABI->MangleCtx); } + +bool CodeGenModule::classNeedsVectorDestructor(const CXXRecordDecl *RD) { + CXXDestructorDecl *Dtor = RD->getDestructor(); + // The compiler can't know if new[]/delete[] will be used outside of the DLL, + // so just force vector deleting destructor emission if dllexport is present. + // This matches MSVC behavior. + if (Dtor && Dtor->isVirtual() && Dtor->isDefined() && + Dtor->hasAttr()) + return true; + + assert(getCXXABI().hasVectorDeletingDtors()); + return RequireVectorDeletingDtor.count(RD); +} + +void CodeGenModule::requireVectorDestructorDefinition(const CXXRecordDecl *RD) { + assert(getCXXABI().hasVectorDeletingDtors()); + RequireVectorDeletingDtor.insert(RD); + + // To reduce code size in general case we lazily emit scalar deleting + // destructor definition and an alias from vector deleting destructor to + // scalar deleting destructor. It may happen that we first emitted the scalar + // deleting destructor definition and the alias and then discovered that the + // definition of the vector deleting destructor is required. Then we need to + // remove the alias and the scalar deleting destructor and queue vector + // deleting destructor body for emission. Check if that is the case. + CXXDestructorDecl *DtorD = RD->getDestructor(); + GlobalDecl ScalarDtorGD(DtorD, Dtor_Deleting); + StringRef MangledName = getMangledName(ScalarDtorGD); + llvm::GlobalValue *Entry = GetGlobalValue(MangledName); + if (Entry && !Entry->isDeclaration()) { + GlobalDecl VectorDtorGD(DtorD, Dtor_VectorDeleting); + StringRef VDName = getMangledName(VectorDtorGD); + llvm::GlobalValue *VDEntry = GetGlobalValue(VDName); + // It exists and it should be an alias. + assert(VDEntry && isa(VDEntry)); + auto *NewFn = llvm::Function::Create( + cast(VDEntry->getValueType()), + llvm::Function::ExternalLinkage, VDName, &getModule()); + SetFunctionAttributes(VectorDtorGD, NewFn, /*IsIncompleteFunction*/ false, + /*IsThunk*/ false); + NewFn->takeName(VDEntry); + VDEntry->replaceAllUsesWith(NewFn); + VDEntry->eraseFromParent(); + Entry->replaceAllUsesWith(NewFn); + Entry->eraseFromParent(); + addDeferredDeclToEmit(VectorDtorGD); + } +} diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 46de3d868f901..2cf15e24180b3 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -528,6 +528,9 @@ class CodeGenModule : public CodeGenTypeCache { /// that we don't re-emit the initializer. llvm::DenseMap DelayedCXXInitPosition; + /// To remember which types did require a vector deleting dtor. + llvm::SmallPtrSet RequireVectorDeletingDtor; + typedef std::pair GlobalInitData; @@ -1542,6 +1545,7 @@ class CodeGenModule : public CodeGenTypeCache { void EmitGlobal(GlobalDecl D); bool TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D); + void EmitDefinitionAsAlias(GlobalDecl Alias, GlobalDecl Target); llvm::GlobalValue *GetGlobalValue(StringRef Ref); @@ -1809,6 +1813,8 @@ class CodeGenModule : public CodeGenTypeCache { // behavior. So projects like the Linux kernel can rely on it. return !getLangOpts().CPlusPlus; } + void requireVectorDestructorDefinition(const CXXRecordDecl *RD); + bool classNeedsVectorDestructor(const CXXRecordDecl *RD); private: bool shouldDropDLLAttribute(const Decl *D, const llvm::GlobalValue *GV) const; diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index 77e995b4c933a..38e3a63ebfb11 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -90,6 +90,8 @@ class ItaniumCXXABI : public CodeGen::CGCXXABI { case Dtor_Comdat: llvm_unreachable("emitting dtor comdat as function?"); + case Dtor_VectorDeleting: + llvm_unreachable("unexpected dtor kind for this ABI"); } llvm_unreachable("bad dtor kind"); } @@ -179,6 +181,7 @@ class ItaniumCXXABI : public CodeGen::CGCXXABI { } bool shouldTypeidBeNullChecked(QualType SrcRecordTy) override; + bool hasVectorDeletingDtors() override { return false; } void EmitBadTypeidCall(CodeGenFunction &CGF) override; llvm::Value *EmitTypeid(CodeGenFunction &CGF, QualType SrcRecordTy, Address ThisPtr, @@ -448,7 +451,8 @@ class ItaniumCXXABI : public CodeGen::CGCXXABI { if (!IsInlined) continue; - StringRef Name = CGM.getMangledName(VtableComponent.getGlobalDecl()); + StringRef Name = CGM.getMangledName( + VtableComponent.getGlobalDecl(/*HasVectorDeletingDtors=*/false)); auto *Entry = CGM.GetGlobalValue(Name); // This checks if virtual inline function has already been emitted. // Note that it is possible that this inline function would be emitted diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp index 40371d99e23e1..464d4370284fb 100644 --- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp @@ -70,8 +70,8 @@ class MicrosoftCXXABI : public CGCXXABI { switch (GD.getDtorType()) { case Dtor_Complete: case Dtor_Deleting: + case Dtor_VectorDeleting: return true; - case Dtor_Base: return false; @@ -145,6 +145,7 @@ class MicrosoftCXXABI : public CGCXXABI { } bool shouldTypeidBeNullChecked(QualType SrcRecordTy) override; + bool hasVectorDeletingDtors() override { return true; } void EmitBadTypeidCall(CodeGenFunction &CGF) override; llvm::Value *EmitTypeid(CodeGenFunction &CGF, QualType SrcRecordTy, Address ThisPtr, @@ -260,7 +261,7 @@ class MicrosoftCXXABI : public CGCXXABI { // There's only Dtor_Deleting in vftable but it shares the this // adjustment with the base one, so look up the deleting one instead. - LookupGD = GlobalDecl(DD, Dtor_Deleting); + LookupGD = GlobalDecl(DD, Dtor_VectorDeleting); } MethodVFTableLocation ML = CGM.getMicrosoftVTableContext().getMethodVFTableLocation(LookupGD); @@ -342,8 +343,8 @@ class MicrosoftCXXABI : public CGCXXABI { void adjustCallArgsForDestructorThunk(CodeGenFunction &CGF, GlobalDecl GD, CallArgList &CallArgs) override { - assert(GD.getDtorType() == Dtor_Deleting && - "Only deleting destructor thunks are available in this ABI"); + assert(GD.getDtorType() == Dtor_VectorDeleting && + "Only vector deleting destructor thunks are available in this ABI"); CallArgs.add(RValue::get(getStructorImplicitParamValue(CGF)), getContext().IntTy); } @@ -1090,7 +1091,8 @@ bool MicrosoftCXXABI::HasThisReturn(GlobalDecl GD) const { static bool isDeletingDtor(GlobalDecl GD) { return isa(GD.getDecl()) && - GD.getDtorType() == Dtor_Deleting; + (GD.getDtorType() == Dtor_Deleting || + GD.getDtorType() == Dtor_VectorDeleting); } bool MicrosoftCXXABI::hasMostDerivedReturn(GlobalDecl GD) const { @@ -1343,7 +1345,8 @@ MicrosoftCXXABI::buildStructorSignature(GlobalDecl GD, AddedStructorArgCounts Added; // TODO: 'for base' flag if (isa(GD.getDecl()) && - GD.getDtorType() == Dtor_Deleting) { + (GD.getDtorType() == Dtor_Deleting || + GD.getDtorType() == Dtor_VectorDeleting)) { // The scalar deleting destructor takes an implicit int parameter. ArgTys.push_back(getContext().IntTy); ++Added.Suffix; @@ -1375,7 +1378,7 @@ void MicrosoftCXXABI::setCXXDestructorDLLStorage(llvm::GlobalValue *GV, CXXDtorType DT) const { // Deleting destructor variants are never imported or exported. Give them the // default storage class. - if (DT == Dtor_Deleting) { + if (DT == Dtor_Deleting || DT == Dtor_VectorDeleting) { GV->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass); } else { const NamedDecl *ND = Dtor; @@ -1409,6 +1412,12 @@ llvm::GlobalValue::LinkageTypes MicrosoftCXXABI::getCXXDestructorLinkage( // and are emitted everywhere they are used. They are internal if the class // is internal. return llvm::GlobalValue::LinkOnceODRLinkage; + case Dtor_VectorDeleting: + // Use the weak, non-ODR linkage for vector deleting destructors to block + // inlining. This enables an MS ABI code-size saving optimization that + // allows us to avoid emitting array deletion code when arrays of a given + // type are not allocated within the final linkage unit. + return llvm::GlobalValue::WeakAnyLinkage; case Dtor_Comdat: llvm_unreachable("MS C++ ABI does not support comdat dtors"); } @@ -1440,7 +1449,7 @@ MicrosoftCXXABI::getVirtualFunctionPrologueThisAdjustment(GlobalDecl GD) { // There's no Dtor_Base in vftable but it shares the this adjustment with // the deleting one, so look it up instead. - GD = GlobalDecl(DD, Dtor_Deleting); + GD = GlobalDecl(DD, Dtor_VectorDeleting); } MethodVFTableLocation ML = @@ -1489,7 +1498,7 @@ Address MicrosoftCXXABI::adjustThisArgumentForVirtualFunctionCall( // There's only Dtor_Deleting in vftable but it shares the this adjustment // with the base one, so look up the deleting one instead. - LookupGD = GlobalDecl(DD, Dtor_Deleting); + LookupGD = GlobalDecl(DD, Dtor_VectorDeleting); } MethodVFTableLocation ML = CGM.getMicrosoftVTableContext().getMethodVFTableLocation(LookupGD); @@ -2002,20 +2011,20 @@ llvm::Value *MicrosoftCXXABI::EmitVirtualDestructorCall( auto *D = dyn_cast(E); assert((CE != nullptr) ^ (D != nullptr)); assert(CE == nullptr || CE->arg_begin() == CE->arg_end()); - assert(DtorType == Dtor_Deleting || DtorType == Dtor_Complete); + assert(DtorType == Dtor_VectorDeleting || DtorType == Dtor_Complete || + DtorType == Dtor_Deleting); // We have only one destructor in the vftable but can get both behaviors // by passing an implicit int parameter. - GlobalDecl GD(Dtor, Dtor_Deleting); + GlobalDecl GD(Dtor, Dtor_VectorDeleting); const CGFunctionInfo *FInfo = &CGM.getTypes().arrangeCXXStructorDeclaration(GD); llvm::FunctionType *Ty = CGF.CGM.getTypes().GetFunctionType(*FInfo); CGCallee Callee = CGCallee::forVirtual(CE, GD, This, Ty); ASTContext &Context = getContext(); - llvm::Value *ImplicitParam = llvm::ConstantInt::get( - llvm::IntegerType::getInt32Ty(CGF.getLLVMContext()), - DtorType == Dtor_Deleting); + uint32_t Flags = ((D && D->isArrayForm()) << 1) | (DtorType == Dtor_Deleting); + llvm::Value *ImplicitParam = CGF.Builder.getInt32(Flags); QualType ThisTy; if (CE) { @@ -4056,6 +4065,18 @@ void MicrosoftCXXABI::emitCXXStructor(GlobalDecl GD) { if (GD.getDtorType() == Dtor_Base && !CGM.TryEmitBaseDestructorAsAlias(dtor)) return; + if (GD.getDtorType() == Dtor_VectorDeleting && + !CGM.classNeedsVectorDestructor(dtor->getParent())) { + // Create GlobalDecl object with the correct type for the scalar + // deleting destructor. + GlobalDecl ScalarDtorGD(dtor, Dtor_Deleting); + + // Emit an alias from the vector deleting destructor to the scalar deleting + // destructor. + CGM.EmitDefinitionAsAlias(GD, ScalarDtorGD); + return; + } + llvm::Function *Fn = CGM.codegenCXXStructor(GD); if (Fn->isWeakForLinker()) Fn->setComdat(CGM.getModule().getOrInsertComdat(Fn->getName())); diff --git a/clang/lib/Driver/ToolChains/Arch/X86.cpp b/clang/lib/Driver/ToolChains/Arch/X86.cpp index 47c2c3e23f9fd..429b041c9c513 100644 --- a/clang/lib/Driver/ToolChains/Arch/X86.cpp +++ b/clang/lib/Driver/ToolChains/Arch/X86.cpp @@ -243,10 +243,18 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple, assert((Version == "1" || Version == "2") && "Invalid AVX10 feature name."); if (Width == "") { - assert(IsNegative && "Only negative options can omit width."); - Features.push_back(Args.MakeArgString("-" + Name + "-256")); + if (IsNegative) + Features.push_back(Args.MakeArgString("-" + Name + "-256")); + else + Features.push_back(Args.MakeArgString("+" + Name + "-512")); } else { - assert((Width == "256" || Width == "512") && "Invalid vector length."); + if (Width == "512") + D.Diag(diag::warn_drv_deprecated_arg) << Name << 1 << Name.drop_back(4); + else if (Width == "256") + D.Diag(diag::warn_drv_deprecated_custom) + << Name << "because AVX10/256 is not supported and will be removed"; + else + assert((Width == "256" || Width == "512") && "Invalid vector length."); Features.push_back(Args.MakeArgString((IsNegative ? "-" : "+") + Name)); } } @@ -275,6 +283,11 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple, D.Diag(diag::err_drv_unsupported_opt_for_target) << A->getSpelling() << Triple.getTriple(); + if (A->getOption().matches(options::OPT_mevex512) || + A->getOption().matches(options::OPT_mno_evex512)) + D.Diag(diag::warn_drv_deprecated_custom) + << Name << "because AVX10/256 is not supported and will be removed"; + if (A->getOption().matches(options::OPT_mapx_features_EQ) || A->getOption().matches(options::OPT_mno_apx_features_EQ)) { diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index 28aea86139e0d..b74a8631efe0f 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -361,6 +361,15 @@ struct ScalarEnumerationTraits< } }; +template <> +struct ScalarEnumerationTraits { + static void enumeration(IO &IO, FormatStyle::EnumTrailingCommaStyle &Value) { + IO.enumCase(Value, "Leave", FormatStyle::ETC_Leave); + IO.enumCase(Value, "Insert", FormatStyle::ETC_Insert); + IO.enumCase(Value, "Remove", FormatStyle::ETC_Remove); + } +}; + template <> struct ScalarEnumerationTraits { static void enumeration(IO &IO, FormatStyle::IndentExternBlockStyle &Value) { @@ -1042,6 +1051,7 @@ template <> struct MappingTraits { Style.EmptyLineAfterAccessModifier); IO.mapOptional("EmptyLineBeforeAccessModifier", Style.EmptyLineBeforeAccessModifier); + IO.mapOptional("EnumTrailingComma", Style.EnumTrailingComma); IO.mapOptional("ExperimentalAutoDetectBinPacking", Style.ExperimentalAutoDetectBinPacking); IO.mapOptional("FixNamespaceComments", Style.FixNamespaceComments); @@ -1558,6 +1568,7 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { LLVMStyle.DisableFormat = false; LLVMStyle.EmptyLineAfterAccessModifier = FormatStyle::ELAAMS_Never; LLVMStyle.EmptyLineBeforeAccessModifier = FormatStyle::ELBAMS_LogicalBlock; + LLVMStyle.EnumTrailingComma = FormatStyle::ETC_Leave; LLVMStyle.ExperimentalAutoDetectBinPacking = false; LLVMStyle.FixNamespaceComments = true; LLVMStyle.ForEachMacros.push_back("foreach"); @@ -2203,6 +2214,21 @@ FormatStyle::GetLanguageStyle(FormatStyle::LanguageKind Language) const { namespace { +void replaceToken(const FormatToken &Token, FormatToken *Next, + const SourceManager &SourceMgr, tooling::Replacements &Result, + StringRef Text = "") { + const auto &Tok = Token.Tok; + SourceLocation Start; + if (Next && Next->NewlinesBefore == 0 && Next->isNot(tok::eof)) { + Start = Tok.getLocation(); + Next->WhitespaceRange = Token.WhitespaceRange; + } else { + Start = Token.WhitespaceRange.getBegin(); + } + const auto &Range = CharSourceRange::getCharRange(Start, Tok.getEndLoc()); + cantFail(Result.add(tooling::Replacement(SourceMgr, Range, Text))); +} + class ParensRemover : public TokenAnalyzer { public: ParensRemover(const Environment &Env, const FormatStyle &Style) @@ -2229,20 +2255,8 @@ class ParensRemover : public TokenAnalyzer { continue; for (const auto *Token = Line->First; Token && !Token->Finalized; Token = Token->Next) { - if (!Token->Optional || !Token->isOneOf(tok::l_paren, tok::r_paren)) - continue; - auto *Next = Token->Next; - assert(Next && Next->isNot(tok::eof)); - SourceLocation Start; - if (Next->NewlinesBefore == 0) { - Start = Token->Tok.getLocation(); - Next->WhitespaceRange = Token->WhitespaceRange; - } else { - Start = Token->WhitespaceRange.getBegin(); - } - const auto &Range = - CharSourceRange::getCharRange(Start, Token->Tok.getEndLoc()); - cantFail(Result.add(tooling::Replacement(SourceMgr, Range, " "))); + if (Token->Optional && Token->isOneOf(tok::l_paren, tok::r_paren)) + replaceToken(*Token, Token->Next, SourceMgr, Result, " "); } } } @@ -2331,24 +2345,13 @@ class BracesRemover : public TokenAnalyzer { const auto *NextLine = I + 1 == End ? nullptr : I[1]; for (const auto *Token = Line->First; Token && !Token->Finalized; Token = Token->Next) { - if (!Token->Optional) - continue; - if (!Token->isOneOf(tok::l_brace, tok::r_brace)) + if (!Token->Optional || !Token->isOneOf(tok::l_brace, tok::r_brace)) continue; auto *Next = Token->Next; assert(Next || Token == Line->Last); if (!Next && NextLine) Next = NextLine->First; - SourceLocation Start; - if (Next && Next->NewlinesBefore == 0 && Next->isNot(tok::eof)) { - Start = Token->Tok.getLocation(); - Next->WhitespaceRange = Token->WhitespaceRange; - } else { - Start = Token->WhitespaceRange.getBegin(); - } - const auto &Range = - CharSourceRange::getCharRange(Start, Token->Tok.getEndLoc()); - cantFail(Result.add(tooling::Replacement(SourceMgr, Range, ""))); + replaceToken(*Token, Next, SourceMgr, Result); } } } @@ -2400,16 +2403,51 @@ class SemiRemover : public TokenAnalyzer { assert(Next || Token == Line->Last); if (!Next && NextLine) Next = NextLine->First; - SourceLocation Start; - if (Next && Next->NewlinesBefore == 0 && Next->isNot(tok::eof)) { - Start = Token->Tok.getLocation(); - Next->WhitespaceRange = Token->WhitespaceRange; - } else { - Start = Token->WhitespaceRange.getBegin(); + replaceToken(*Token, Next, SourceMgr, Result); + } + } + } +}; + +class EnumTrailingCommaEditor : public TokenAnalyzer { +public: + EnumTrailingCommaEditor(const Environment &Env, const FormatStyle &Style) + : TokenAnalyzer(Env, Style) {} + + std::pair + analyze(TokenAnnotator &Annotator, + SmallVectorImpl &AnnotatedLines, + FormatTokenLexer &Tokens) override { + AffectedRangeMgr.computeAffectedLines(AnnotatedLines); + tooling::Replacements Result; + editEnumTrailingComma(AnnotatedLines, Result); + return {Result, 0}; + } + +private: + void editEnumTrailingComma(SmallVectorImpl &Lines, + tooling::Replacements &Result) { + const auto &SourceMgr = Env.getSourceManager(); + for (auto *Line : Lines) { + if (!Line->Children.empty()) + editEnumTrailingComma(Line->Children, Result); + if (!Line->Affected) + continue; + for (const auto *Token = Line->First; Token && !Token->Finalized; + Token = Token->Next) { + if (Token->isNot(TT_EnumRBrace)) + continue; + const auto *BeforeRBrace = Token->getPreviousNonComment(); + assert(BeforeRBrace); + if (BeforeRBrace->is(TT_EnumLBrace)) // Empty braces. + continue; + if (BeforeRBrace->is(tok::comma)) { + if (Style.EnumTrailingComma == FormatStyle::ETC_Remove) + replaceToken(*BeforeRBrace, BeforeRBrace->Next, SourceMgr, Result); + } else if (Style.EnumTrailingComma == FormatStyle::ETC_Insert) { + cantFail(Result.add(tooling::Replacement( + SourceMgr, BeforeRBrace->Tok.getEndLoc(), 0, ","))); } - const auto &Range = - CharSourceRange::getCharRange(Start, Token->Tok.getEndLoc()); - cantFail(Result.add(tooling::Replacement(SourceMgr, Range, ""))); } } } @@ -3812,6 +3850,13 @@ reformat(const FormatStyle &Style, StringRef Code, }); } + if (Style.EnumTrailingComma != FormatStyle::ETC_Leave) { + Passes.emplace_back([&](const Environment &Env) { + return EnumTrailingCommaEditor(Env, Expanded) + .process(/*SkipAnnotation=*/true); + }); + } + if (Style.FixNamespaceComments) { Passes.emplace_back([&](const Environment &Env) { return NamespaceEndCommentsFixer(Env, Expanded).process(); diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index 3808872d227a9..a5c2388bb143d 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -1243,6 +1243,7 @@ struct AdditionalKeywords { kw_unique0 = &IdentTable.get("unique0"); kw_uwire = &IdentTable.get("uwire"); kw_vectored = &IdentTable.get("vectored"); + kw_wait = &IdentTable.get("wait"); kw_wand = &IdentTable.get("wand"); kw_weak0 = &IdentTable.get("weak0"); kw_weak1 = &IdentTable.get("weak1"); @@ -1299,70 +1300,49 @@ struct AdditionalKeywords { // treatment like `showcancelled` or they should be treated as identifiers // like `int` and `logic`. VerilogExtraKeywords = std::unordered_set( - {kw_always, kw_always_comb, - kw_always_ff, kw_always_latch, - kw_assert, kw_assign, - kw_assume, kw_automatic, - kw_before, kw_begin, - kw_bins, kw_binsof, - kw_casex, kw_casez, - kw_celldefine, kw_checker, - kw_clocking, kw_constraint, - kw_cover, kw_covergroup, - kw_coverpoint, kw_disable, - kw_dist, kw_edge, - kw_end, kw_endcase, - kw_endchecker, kw_endclass, - kw_endclocking, kw_endfunction, - kw_endgenerate, kw_endgroup, - kw_endinterface, kw_endmodule, - kw_endpackage, kw_endprimitive, - kw_endprogram, kw_endproperty, - kw_endsequence, kw_endspecify, - kw_endtable, kw_endtask, - kw_extends, kw_final, - kw_foreach, kw_forever, - kw_fork, kw_function, - kw_generate, kw_highz0, - kw_highz1, kw_iff, - kw_ifnone, kw_ignore_bins, - kw_illegal_bins, kw_implements, - kw_import, kw_initial, - kw_inout, kw_input, - kw_inside, kw_interconnect, - kw_interface, kw_intersect, - kw_join, kw_join_any, - kw_join_none, kw_large, - kw_let, kw_local, - kw_localparam, kw_macromodule, - kw_matches, kw_medium, - kw_negedge, kw_output, - kw_package, kw_packed, - kw_parameter, kw_posedge, - kw_primitive, kw_priority, - kw_program, kw_property, - kw_pull0, kw_pull1, - kw_pure, kw_rand, - kw_randc, kw_randcase, - kw_randsequence, kw_ref, - kw_repeat, kw_sample, - kw_scalared, kw_sequence, - kw_small, kw_soft, - kw_solve, kw_specify, - kw_specparam, kw_strong0, - kw_strong1, kw_supply0, - kw_supply1, kw_table, - kw_tagged, kw_task, - kw_tri, kw_tri0, - kw_tri1, kw_triand, - kw_trior, kw_trireg, - kw_unique, kw_unique0, - kw_uwire, kw_var, - kw_vectored, kw_wand, - kw_weak0, kw_weak1, - kw_wildcard, kw_wire, - kw_with, kw_wor, - kw_verilogHash, kw_verilogHashHash}); + {kw_always, kw_always_comb, kw_always_ff, + kw_always_latch, kw_assert, kw_assign, + kw_assume, kw_automatic, kw_before, + kw_begin, kw_bins, kw_binsof, + kw_casex, kw_casez, kw_celldefine, + kw_checker, kw_clocking, kw_constraint, + kw_cover, kw_covergroup, kw_coverpoint, + kw_disable, kw_dist, kw_edge, + kw_end, kw_endcase, kw_endchecker, + kw_endclass, kw_endclocking, kw_endfunction, + kw_endgenerate, kw_endgroup, kw_endinterface, + kw_endmodule, kw_endpackage, kw_endprimitive, + kw_endprogram, kw_endproperty, kw_endsequence, + kw_endspecify, kw_endtable, kw_endtask, + kw_extends, kw_final, kw_foreach, + kw_forever, kw_fork, kw_function, + kw_generate, kw_highz0, kw_highz1, + kw_iff, kw_ifnone, kw_ignore_bins, + kw_illegal_bins, kw_implements, kw_import, + kw_initial, kw_inout, kw_input, + kw_inside, kw_interconnect, kw_interface, + kw_intersect, kw_join, kw_join_any, + kw_join_none, kw_large, kw_let, + kw_local, kw_localparam, kw_macromodule, + kw_matches, kw_medium, kw_negedge, + kw_output, kw_package, kw_packed, + kw_parameter, kw_posedge, kw_primitive, + kw_priority, kw_program, kw_property, + kw_pull0, kw_pull1, kw_pure, + kw_rand, kw_randc, kw_randcase, + kw_randsequence, kw_ref, kw_repeat, + kw_sample, kw_scalared, kw_sequence, + kw_small, kw_soft, kw_solve, + kw_specify, kw_specparam, kw_strong0, + kw_strong1, kw_supply0, kw_supply1, + kw_table, kw_tagged, kw_task, + kw_tri, kw_tri0, kw_tri1, + kw_triand, kw_trior, kw_trireg, + kw_unique, kw_unique0, kw_uwire, + kw_var, kw_vectored, kw_wait, + kw_wand, kw_weak0, kw_weak1, + kw_wildcard, kw_wire, kw_with, + kw_wor, kw_verilogHash, kw_verilogHashHash}); TableGenExtraKeywords = std::unordered_set({ kw_assert, @@ -1614,6 +1594,7 @@ struct AdditionalKeywords { IdentifierInfo *kw_unique0; IdentifierInfo *kw_uwire; IdentifierInfo *kw_vectored; + IdentifierInfo *kw_wait; IdentifierInfo *kw_wand; IdentifierInfo *kw_weak0; IdentifierInfo *kw_weak1; @@ -1849,8 +1830,12 @@ struct AdditionalKeywords { /// Returns whether \p Tok is a Verilog keyword that opens a block. bool isVerilogBegin(const FormatToken &Tok) const { // `table` is not included since it needs to be treated specially. - return !Tok.endsSequence(kw_fork, kw_disable) && - Tok.isOneOf(kw_begin, kw_fork, kw_generate, kw_specify); + if (Tok.isOneOf(kw_begin, kw_generate, kw_specify)) + return true; + if (Tok.isNot(kw_fork)) + return false; + const auto *Prev = Tok.getPreviousNonComment(); + return !(Prev && Prev->isOneOf(kw_disable, kw_wait)); } /// Returns whether \p Tok is a Verilog keyword that closes a block. diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index eed54a11684b5..014b10b206d90 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -1306,15 +1306,12 @@ FormatToken *FormatTokenLexer::getNextToken() { FormatTok->isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete, tok::kw_operator)) { FormatTok->Tok.setKind(tok::identifier); - FormatTok->Tok.setIdentifierInfo(nullptr); } else if (Style.isJavaScript() && FormatTok->isOneOf(tok::kw_struct, tok::kw_union, tok::kw_operator)) { FormatTok->Tok.setKind(tok::identifier); - FormatTok->Tok.setIdentifierInfo(nullptr); } else if (Style.isTableGen() && !Keywords.isTableGenKeyword(*FormatTok)) { FormatTok->Tok.setKind(tok::identifier); - FormatTok->Tok.setIdentifierInfo(nullptr); } } else if (FormatTok->is(tok::greatergreater)) { FormatTok->Tok.setKind(tok::greater); diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp index e7e0b4cfb72a7..ebee5994bfed2 100644 --- a/clang/lib/Sema/SemaConcept.cpp +++ b/clang/lib/Sema/SemaConcept.cpp @@ -453,6 +453,7 @@ static ExprResult calculateConstraintSatisfaction( Sema::InstantiatingTemplate Inst( S, AtomicExpr->getBeginLoc(), Sema::InstantiatingTemplate::ConstraintSubstitution{}, + // FIXME: improve const-correctness of InstantiatingTemplate const_cast(Template), Info, AtomicExpr->getSourceRange()); if (Inst.isInvalid()) @@ -1435,9 +1436,9 @@ void Sema::DiagnoseUnsatisfiedConstraint( } } -const NormalizedConstraint * -Sema::getNormalizedAssociatedConstraints( - NamedDecl *ConstrainedDecl, ArrayRef AssociatedConstraints) { +const NormalizedConstraint *Sema::getNormalizedAssociatedConstraints( + const NamedDecl *ConstrainedDecl, + ArrayRef AssociatedConstraints) { // In case the ConstrainedDecl comes from modules, it is necessary to use // the canonical decl to avoid different atomic constraints with the 'same' // declarations. @@ -1461,7 +1462,7 @@ Sema::getNormalizedAssociatedConstraints( } const NormalizedConstraint *clang::getNormalizedAssociatedConstraints( - Sema &S, NamedDecl *ConstrainedDecl, + Sema &S, const NamedDecl *ConstrainedDecl, ArrayRef AssociatedConstraints) { return S.getNormalizedAssociatedConstraints(ConstrainedDecl, AssociatedConstraints); @@ -1527,7 +1528,8 @@ substituteParameterMappings(Sema &S, NormalizedConstraint &N, Sema::InstantiatingTemplate Inst( S, InstLocBegin, Sema::InstantiatingTemplate::ParameterMappingSubstitution{}, - Atomic.ConstraintDecl, {InstLocBegin, InstLocEnd}); + const_cast(Atomic.ConstraintDecl), + {InstLocBegin, InstLocEnd}); if (Inst.isInvalid()) return true; if (S.SubstTemplateArguments(*Atomic.ParameterMapping, MLTAL, SubstArgs)) @@ -1591,7 +1593,7 @@ NormalizedConstraint &NormalizedConstraint::getRHS() const { } std::optional -NormalizedConstraint::fromConstraintExprs(Sema &S, NamedDecl *D, +NormalizedConstraint::fromConstraintExprs(Sema &S, const NamedDecl *D, ArrayRef E) { assert(E.size() != 0); auto Conjunction = fromConstraintExpr(S, D, E[0]); @@ -1608,7 +1610,8 @@ NormalizedConstraint::fromConstraintExprs(Sema &S, NamedDecl *D, } std::optional -NormalizedConstraint::fromConstraintExpr(Sema &S, NamedDecl *D, const Expr *E) { +NormalizedConstraint::fromConstraintExpr(Sema &S, const NamedDecl *D, + const Expr *E) { assert(E != nullptr); // C++ [temp.constr.normal]p1.1 @@ -1637,8 +1640,9 @@ NormalizedConstraint::fromConstraintExpr(Sema &S, NamedDecl *D, const Expr *E) { { Sema::InstantiatingTemplate Inst( S, CSE->getExprLoc(), - Sema::InstantiatingTemplate::ConstraintNormalization{}, D, - CSE->getSourceRange()); + Sema::InstantiatingTemplate::ConstraintNormalization{}, + // FIXME: improve const-correctness of InstantiatingTemplate + const_cast(D), CSE->getSourceRange()); if (Inst.isInvalid()) return std::nullopt; // C++ [temp.constr.normal]p1.1 @@ -1726,9 +1730,9 @@ bool FoldExpandedConstraint::AreCompatibleForSubsumption( return false; } -bool Sema::IsAtLeastAsConstrained(NamedDecl *D1, +bool Sema::IsAtLeastAsConstrained(const NamedDecl *D1, MutableArrayRef AC1, - NamedDecl *D2, + const NamedDecl *D2, MutableArrayRef AC2, bool &Result) { #ifndef NDEBUG @@ -1755,7 +1759,7 @@ bool Sema::IsAtLeastAsConstrained(NamedDecl *D1, return false; } - std::pair Key{D1, D2}; + std::pair Key{D1, D2}; auto CacheEntry = SubsumptionCache.find(Key); if (CacheEntry != SubsumptionCache.end()) { Result = CacheEntry->second; @@ -1789,7 +1793,7 @@ bool Sema::IsAtLeastAsConstrained(NamedDecl *D1, } bool Sema::MaybeEmitAmbiguousAtomicConstraintsDiagnostic( - NamedDecl *D1, ArrayRef AC1, NamedDecl *D2, + const NamedDecl *D1, ArrayRef AC1, const NamedDecl *D2, ArrayRef AC2) { if (isSFINAEContext()) @@ -2055,7 +2059,7 @@ FormulaType SubsumptionChecker::Normalize(const NormalizedConstraint &NC) { FormulaType Res; auto Add = [&, this](Clause C) { - // Sort each clause and remove duplicates for faster comparisons + // Sort each clause and remove duplicates for faster comparisons. llvm::sort(C); C.erase(llvm::unique(C), C.end()); AddUniqueClauseToFormula(Res, std::move(C)); @@ -2102,9 +2106,9 @@ void SubsumptionChecker::AddUniqueClauseToFormula(Formula &F, Clause C) { F.push_back(C); } -std::optional SubsumptionChecker::Subsumes(NamedDecl *DP, +std::optional SubsumptionChecker::Subsumes(const NamedDecl *DP, ArrayRef P, - NamedDecl *DQ, + const NamedDecl *DQ, ArrayRef Q) { const NormalizedConstraint *PNormalized = getNormalizedAssociatedConstraints(SemaRef, DP, P); diff --git a/clang/lib/StaticAnalyzer/Core/Store.cpp b/clang/lib/StaticAnalyzer/Core/Store.cpp index 5f30fae4b7047..da6885ecd0ec5 100644 --- a/clang/lib/StaticAnalyzer/Core/Store.cpp +++ b/clang/lib/StaticAnalyzer/Core/Store.cpp @@ -511,13 +511,9 @@ SVal StoreManager::getLValueElement(QualType elementType, NonLoc Offset, // Only allow non-integer offsets if the base region has no offset itself. // FIXME: This is a somewhat arbitrary restriction. We should be using // SValBuilder here to add the two offsets without checking their types. - if (!isa(Offset)) { - if (isa(BaseRegion->StripCasts())) - return UnknownVal(); - + if (!isa(Offset)) return loc::MemRegionVal(MRMgr.getElementRegion( elementType, Offset, cast(ElemR->getSuperRegion()), Ctx)); - } const llvm::APSInt& OffI = Offset.castAs().getValue(); assert(BaseIdxI.isSigned()); diff --git a/clang/test/Analysis/ArrayBound/assumption-reporting.c b/clang/test/Analysis/ArrayBound/assumption-reporting.c index d687886ada1ae..535e623baa815 100644 --- a/clang/test/Analysis/ArrayBound/assumption-reporting.c +++ b/clang/test/Analysis/ArrayBound/assumption-reporting.c @@ -39,14 +39,9 @@ int assumingBothPointerToMiddle(int arg) { // will speak about the "byte offset" measured from the beginning of the TenElements. int *p = TenElements + 2; int a = p[arg]; - // FIXME: The following note does not appear: - // {{Assuming byte offset is non-negative and less than 40, the extent of 'TenElements'}} - // It seems that the analyzer "gives up" modeling this pointer arithmetics - // and says that `p[arg]` is just an UnknownVal (instead of calculating that - // it's equivalent to `TenElements[2+arg]`). + // expected-note@-1 {{Assuming byte offset is non-negative and less than 40, the extent of 'TenElements'}} int b = TenElements[arg]; // This is normal access, and only the lower bound is new. - // expected-note@-1 {{Assuming index is non-negative}} int c = TenElements[arg + 10]; // expected-warning@-1 {{Out of bound access to memory after the end of 'TenElements'}} // expected-note@-2 {{Access of 'TenElements' at an overflowing index, while it holds only 10 'int' elements}} diff --git a/clang/test/Analysis/Checkers/WebKit/ref-cntbl-crtp-base-no-virtual-dtor.cpp b/clang/test/Analysis/Checkers/WebKit/ref-cntbl-crtp-base-no-virtual-dtor.cpp index 4209db14eaa52..106091b240af6 100644 --- a/clang/test/Analysis/Checkers/WebKit/ref-cntbl-crtp-base-no-virtual-dtor.cpp +++ b/clang/test/Analysis/Checkers/WebKit/ref-cntbl-crtp-base-no-virtual-dtor.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=webkit.RefCntblBaseVirtualDtor -verify %s +// RUN: %clang_analyze_cc1 -analyzer-checker=webkit.RefCntblBaseVirtualDtor -verify %s -Wno-unnecessary-virtual-specifier #include "mock-types.h" diff --git a/clang/test/Analysis/lvalue_elements.c b/clang/test/Analysis/lvalue_elements.c new file mode 100644 index 0000000000000..73b9c037d80d2 --- /dev/null +++ b/clang/test/Analysis/lvalue_elements.c @@ -0,0 +1,31 @@ +// RUN: %clang_analyze_cc1 -std=c11 -analyzer-checker=debug.ExprInspection -verify %s + +void clang_analyzer_dump(int*); + +const int const_index = 1; +extern int unknown_index; +extern int array[3]; +extern int matrix[3][3]; + +int main(){ + + // expected-warning@+1 {{&Element{array,1 S64b,int}}} + clang_analyzer_dump(&array[const_index]); + + // expected-warning@+1 {{&Element{array,reg_$1,int}}} + clang_analyzer_dump(&array[unknown_index]); + + // expected-warning@+1 {{&Element{Element{matrix,1 S64b,int[3]},1 S64b,int}}} + clang_analyzer_dump(&matrix[const_index][const_index]); + + // expected-warning@+1 {{&Element{Element{matrix,reg_$1,int[3]},1 S64b,int}}} + clang_analyzer_dump(&matrix[unknown_index][const_index]); + + // expected-warning@+1 {{&Element{Element{matrix,1 S64b,int[3]},reg_$1,int}}} + clang_analyzer_dump(&matrix[const_index][unknown_index]); + + // expected-warning@+1 {{&Element{Element{matrix,reg_$1,int[3]},reg_$1,int}}} + clang_analyzer_dump(&matrix[unknown_index][unknown_index]); + + return 0; +} diff --git a/clang/test/CXX/class/p2-0x.cpp b/clang/test/CXX/class/p2-0x.cpp index 5b39e0ada7e2c..2043486457baf 100644 --- a/clang/test/CXX/class/p2-0x.cpp +++ b/clang/test/CXX/class/p2-0x.cpp @@ -28,7 +28,7 @@ struct C : A { }; // expected-error {{base 'A' is marked 'final'}} namespace Test4 { -struct A final { virtual void func() = 0; }; // expected-warning {{abstract class is marked 'final'}} expected-note {{unimplemented pure virtual method 'func' in 'A'}} +struct A final { virtual void func() = 0; }; // expected-warning {{abstract class is marked 'final'}} expected-note {{unimplemented pure virtual method 'func' in 'A'}} expected-warning {{virtual method 'func' is inside a 'final' class}}} struct B { virtual void func() = 0; }; // expected-note {{unimplemented pure virtual method 'func' in 'C'}} struct C final : B { }; // expected-warning {{abstract class is marked 'final'}} diff --git a/clang/test/CodeGenCXX/debug-info-windows-dtor.cpp b/clang/test/CodeGenCXX/debug-info-windows-dtor.cpp index beea56ce7368b..ffef45b9f7d1b 100644 --- a/clang/test/CodeGenCXX/debug-info-windows-dtor.cpp +++ b/clang/test/CodeGenCXX/debug-info-windows-dtor.cpp @@ -16,7 +16,7 @@ struct AB: A, B { template struct AB; // CHECK: define {{.*}}@"??_E?$AB@H@@W3AEPAXI@Z"({{.*}} !dbg [[THUNK_VEC_DEL_DTOR:![0-9]*]] -// CHECK: call {{.*}}@"??_G?$AB@H@@UAEPAXI@Z"({{.*}}) #{{[0-9]*}}, !dbg [[THUNK_LOC:![0-9]*]] +// CHECK: call {{.*}}@"??_E?$AB@H@@UAEPAXI@Z"({{.*}}) #{{[0-9]*}}, !dbg [[THUNK_LOC:![0-9]*]] // CHECK: define // CHECK: [[THUNK_VEC_DEL_DTOR]] = distinct !DISubprogram diff --git a/clang/test/CodeGenCXX/dllexport.cpp b/clang/test/CodeGenCXX/dllexport.cpp index c8ac526f4cbe3..16eaac75e702f 100644 --- a/clang/test/CodeGenCXX/dllexport.cpp +++ b/clang/test/CodeGenCXX/dllexport.cpp @@ -631,7 +631,7 @@ struct __declspec(dllexport) Y { struct __declspec(dllexport) Z { virtual ~Z() {} }; // The scalar deleting dtor does not get exported: -// M32-DAG: define linkonce_odr dso_local x86_thiscallcc ptr @"??_GZ@@UAEPAXI@Z" +// M32-DAG: define weak dso_local x86_thiscallcc ptr @"??_EZ@@UAEPAXI@Z" // The user-defined dtor does get exported: diff --git a/clang/test/CodeGenCXX/microsoft-abi-extern-template.cpp b/clang/test/CodeGenCXX/microsoft-abi-extern-template.cpp index ea12aa64ae305..67df330bc3263 100644 --- a/clang/test/CodeGenCXX/microsoft-abi-extern-template.cpp +++ b/clang/test/CodeGenCXX/microsoft-abi-extern-template.cpp @@ -4,7 +4,7 @@ // own copy the vftable when emitting the available externally constructor. // CHECK: @"??_7?$Foo@H@@6B@" = linkonce_odr unnamed_addr constant { [1 x ptr] } { [1 x ptr] [ -// CHECK-SAME: ptr @"??_G?$Foo@H@@UEAAPEAXI@Z" +// CHECK-SAME: ptr @"??_E?$Foo@H@@UEAAPEAXI@Z" // CHECK-SAME: ] }, comdat // CHECK-LABEL: define dso_local noundef ptr @"?f@@YAPEAU?$Foo@H@@XZ"() diff --git a/clang/test/CodeGenCXX/microsoft-abi-structors.cpp b/clang/test/CodeGenCXX/microsoft-abi-structors.cpp index 07abc3d065e5e..2ff7391ec8c8f 100644 --- a/clang/test/CodeGenCXX/microsoft-abi-structors.cpp +++ b/clang/test/CodeGenCXX/microsoft-abi-structors.cpp @@ -52,7 +52,8 @@ struct C { // DTORS: store ptr %{{.*}}, ptr %[[RETVAL:retval]] // DTORS: %[[SHOULD_DELETE_VALUE:[0-9a-z._]+]] = load i32, ptr %[[SHOULD_DELETE_VAR]] // DTORS: call x86_thiscallcc void @"??1C@basic@@UAE@XZ"(ptr {{[^,]*}} %[[THIS:[0-9a-z]+]]) -// DTORS-NEXT: %[[CONDITION:[0-9]+]] = icmp eq i32 %[[SHOULD_DELETE_VALUE]], 0 +// DTORS-NEXT: %[[AND:[0-9]+]] = and i32 %[[SHOULD_DELETE_VALUE]], 1 +// DTORS-NEXT: %[[CONDITION:[0-9]+]] = icmp eq i32 %[[AND]], 0 // DTORS-NEXT: br i1 %[[CONDITION]], label %[[CONTINUE_LABEL:[0-9a-z._]+]], label %[[CALL_DELETE_LABEL:[0-9a-z._]+]] // // DTORS: [[CALL_DELETE_LABEL]] @@ -166,7 +167,7 @@ void foo() { // DTORS2-LABEL: define linkonce_odr dso_local x86_thiscallcc ptr @"??_EC@dtor_in_second_nvbase@@W3AEPAXI@Z"(ptr %this, i32 %should_call_delete) // Do an adjustment from B* to C*. // DTORS2: getelementptr i8, ptr %{{.*}}, i32 -4 -// DTORS2: %[[CALL:.*]] = tail call x86_thiscallcc ptr @"??_GC@dtor_in_second_nvbase@@UAEPAXI@Z" +// DTORS2: %[[CALL:.*]] = tail call x86_thiscallcc ptr @"??_EC@dtor_in_second_nvbase@@UAEPAXI@Z" // DTORS2: ret ptr %[[CALL]] } diff --git a/clang/test/CodeGenCXX/microsoft-abi-thunks.cpp b/clang/test/CodeGenCXX/microsoft-abi-thunks.cpp index 38aa81253ccad..83ec158ff7f51 100644 --- a/clang/test/CodeGenCXX/microsoft-abi-thunks.cpp +++ b/clang/test/CodeGenCXX/microsoft-abi-thunks.cpp @@ -63,8 +63,7 @@ C::C() {} // Emits vftable and forces thunk generation. // CODEGEN-LABEL: define linkonce_odr dso_local x86_thiscallcc noundef ptr @"??_EC@@W3AEPAXI@Z"(ptr noundef %this, i32 noundef %should_call_delete) {{.*}} comdat // CODEGEN: getelementptr i8, ptr {{.*}}, i32 -4 -// FIXME: should actually call _EC, not _GC. -// CODEGEN: call x86_thiscallcc noundef ptr @"??_GC@@UAEPAXI@Z" +// CODEGEN: call x86_thiscallcc noundef ptr @"??_EC@@UAEPAXI@Z" // CODEGEN: ret // CODEGEN-LABEL: define linkonce_odr dso_local x86_thiscallcc void @"?public_f@C@@W3AEXXZ"(ptr diff --git a/clang/test/CodeGenCXX/microsoft-abi-vftables.cpp b/clang/test/CodeGenCXX/microsoft-abi-vftables.cpp index bc278bdb847fc..7ceb15e40e582 100644 --- a/clang/test/CodeGenCXX/microsoft-abi-vftables.cpp +++ b/clang/test/CodeGenCXX/microsoft-abi-vftables.cpp @@ -8,38 +8,38 @@ struct S { virtual ~S(); } s; -// RTTI-DAG: [[VTABLE_S:@.*]] = private unnamed_addr constant { [2 x ptr] } { [2 x ptr] [ptr @"??_R4S@@6B@", ptr @"??_GS@@UAEPAXI@Z"] }, comdat($"??_7S@@6B@") +// RTTI-DAG: [[VTABLE_S:@.*]] = private unnamed_addr constant { [2 x ptr] } { [2 x ptr] [ptr @"??_R4S@@6B@", ptr @"??_ES@@UAEPAXI@Z"] }, comdat($"??_7S@@6B@") // RTTI-DAG: @"??_7S@@6B@" = unnamed_addr alias ptr, getelementptr inbounds ({ [2 x ptr] }, ptr [[VTABLE_S]], i32 0, i32 0, i32 1) -// NO-RTTI-DAG: @"??_7S@@6B@" = linkonce_odr unnamed_addr constant { [1 x ptr] } { [1 x ptr] [ptr @"??_GS@@UAEPAXI@Z"] } +// NO-RTTI-DAG: @"??_7S@@6B@" = linkonce_odr unnamed_addr constant { [1 x ptr] } { [1 x ptr] [ptr @"??_ES@@UAEPAXI@Z"] } struct __declspec(dllimport) U { virtual ~U(); } u; -// RTTI-DAG: [[VTABLE_U:@.*]] = private unnamed_addr constant { [2 x ptr] } { [2 x ptr] [ptr @"??_R4U@@6B@", ptr @"??_GU@@UAEPAXI@Z"] } +// RTTI-DAG: [[VTABLE_U:@.*]] = private unnamed_addr constant { [2 x ptr] } { [2 x ptr] [ptr @"??_R4U@@6B@", ptr @"??_EU@@UAEPAXI@Z"] } // RTTI-DAG: @"??_SU@@6B@" = unnamed_addr alias ptr, getelementptr inbounds ({ [2 x ptr] }, ptr [[VTABLE_U]], i32 0, i32 0, i32 1) -// NO-RTTI-DAG: @"??_SU@@6B@" = linkonce_odr unnamed_addr constant { [1 x ptr] } { [1 x ptr] [ptr @"??_GU@@UAEPAXI@Z"] } +// NO-RTTI-DAG: @"??_SU@@6B@" = linkonce_odr unnamed_addr constant { [1 x ptr] } { [1 x ptr] [ptr @"??_EU@@UAEPAXI@Z"] } struct __declspec(dllexport) V { virtual ~V(); } v; -// RTTI-DAG: [[VTABLE_V:@.*]] = private unnamed_addr constant { [2 x ptr] } { [2 x ptr] [ptr @"??_R4V@@6B@", ptr @"??_GV@@UAEPAXI@Z"] }, comdat($"??_7V@@6B@") +// RTTI-DAG: [[VTABLE_V:@.*]] = private unnamed_addr constant { [2 x ptr] } { [2 x ptr] [ptr @"??_R4V@@6B@", ptr @"??_EV@@UAEPAXI@Z"] }, comdat($"??_7V@@6B@") // RTTI-DAG: @"??_7V@@6B@" = dllexport unnamed_addr alias ptr, getelementptr inbounds ({ [2 x ptr] }, ptr [[VTABLE_V]], i32 0, i32 0, i32 1) -// NO-RTTI-DAG: @"??_7V@@6B@" = weak_odr dllexport unnamed_addr constant { [1 x ptr] } { [1 x ptr] [ptr @"??_GV@@UAEPAXI@Z"] } +// NO-RTTI-DAG: @"??_7V@@6B@" = weak_odr dllexport unnamed_addr constant { [1 x ptr] } { [1 x ptr] [ptr @"??_EV@@UAEPAXI@Z"] } namespace { struct W { virtual ~W() {} } w; } -// RTTI-DAG: [[VTABLE_W:@.*]] = private unnamed_addr constant { [2 x ptr] } { [2 x ptr] [ptr @"??_R4W@?A0x{{[^@]*}}@@6B@", ptr @"??_GW@?A0x{{[^@]*}}@@UAEPAXI@Z"] } +// RTTI-DAG: [[VTABLE_W:@.*]] = private unnamed_addr constant { [2 x ptr] } { [2 x ptr] [ptr @"??_R4W@?A0x{{[^@]*}}@@6B@", ptr @"??_EW@?A0x{{[^@]*}}@@UAEPAXI@Z"] } // RTTI-DAG: @"??_7W@?A0x{{[^@]*}}@@6B@" = internal unnamed_addr alias ptr, getelementptr inbounds ({ [2 x ptr] }, ptr [[VTABLE_W]], i32 0, i32 0, i32 1) -// NO-RTTI-DAG: @"??_7W@?A0x{{[^@]*}}@@6B@" = internal unnamed_addr constant { [1 x ptr] } { [1 x ptr] [ptr @"??_GW@?A0x{{[^@]*}}@@UAEPAXI@Z"] } +// NO-RTTI-DAG: @"??_7W@?A0x{{[^@]*}}@@6B@" = internal unnamed_addr constant { [1 x ptr] } { [1 x ptr] [ptr @"??_EW@?A0x{{[^@]*}}@@UAEPAXI@Z"] } struct X {}; template struct Y : virtual X { @@ -49,7 +49,7 @@ template struct Y : virtual X { extern template class Y; template Y::Y(); -// RTTI-DAG: [[VTABLE_Y:@.*]] = private unnamed_addr constant { [2 x ptr] } { [2 x ptr] [ptr @"??_R4?$Y@H@@6B@", ptr @"??_G?$Y@H@@UAEPAXI@Z"] }, comdat($"??_7?$Y@H@@6B@") +// RTTI-DAG: [[VTABLE_Y:@.*]] = private unnamed_addr constant { [2 x ptr] } { [2 x ptr] [ptr @"??_R4?$Y@H@@6B@", ptr @"??_E?$Y@H@@UAEPAXI@Z"] }, comdat($"??_7?$Y@H@@6B@") // RTTI-DAG: @"??_7?$Y@H@@6B@" = unnamed_addr alias ptr, getelementptr inbounds ({ [2 x ptr] }, ptr [[VTABLE_Y]], i32 0, i32 0, i32 1) -// NO-RTTI-DAG: @"??_7?$Y@H@@6B@" = linkonce_odr unnamed_addr constant { [1 x ptr] } { [1 x ptr] [ptr @"??_G?$Y@H@@UAEPAXI@Z"] }, comdat +// NO-RTTI-DAG: @"??_7?$Y@H@@6B@" = linkonce_odr unnamed_addr constant { [1 x ptr] } { [1 x ptr] [ptr @"??_E?$Y@H@@UAEPAXI@Z"] }, comdat diff --git a/clang/test/CodeGenCXX/microsoft-abi-virtual-inheritance.cpp b/clang/test/CodeGenCXX/microsoft-abi-virtual-inheritance.cpp index b54775f6c5dd0..7e9dce18b2797 100644 --- a/clang/test/CodeGenCXX/microsoft-abi-virtual-inheritance.cpp +++ b/clang/test/CodeGenCXX/microsoft-abi-virtual-inheritance.cpp @@ -80,6 +80,15 @@ B::~B() { // CHECK2: call x86_thiscallcc void @"??1VBase@@UAE@XZ"(ptr {{[^,]*}} %[[VBASE_i8]]) // CHECK2: ret + // CHECK2-LABEL: define linkonce_odr dso_local x86_thiscallcc noundef ptr @"??0B@test2@@QAE@XZ" + // CHECK2: (ptr {{[^,]*}} returned align 4 dereferenceable(4) %this, i32 noundef %is_most_derived) + // CHECK2: call x86_thiscallcc noundef ptr @"??0A@test2@@QAE@XZ"(ptr {{[^,]*}} %{{.*}}) + // CHECK2: ret + + // CHECK2-LABEL: define linkonce_odr dso_local x86_thiscallcc noundef ptr @"??_GD@pr36921@@UAEPAXI@Z"( + // CHECK2: %[[THIS_RELOAD:.*]] = load ptr, ptr + // CHECK2: %[[THIS_ADJ_i8:.*]] = getelementptr inbounds i8, ptr %[[THIS_RELOAD]], i32 -4 + // CHECK2-LABEL: define linkonce_odr dso_local x86_thiscallcc noundef ptr @"??_GB@@UAEPAXI@Z" // CHECK2: store ptr %{{.*}}, ptr %[[THIS_ADDR:.*]], align 4 // CHECK2: %[[THIS_i8:.*]] = getelementptr inbounds i8, ptr %[[THIS_PARAM_i8:.*]], i32 -8 @@ -293,11 +302,6 @@ void callC() { C x; } // CHECK: call x86_thiscallcc noundef ptr @"??0A@test2@@QAE@XZ"(ptr {{[^,]*}} %{{.*}}) // CHECK: ret -// CHECK2-LABEL: define linkonce_odr dso_local x86_thiscallcc noundef ptr @"??0B@test2@@QAE@XZ" -// CHECK2: (ptr {{[^,]*}} returned align 4 dereferenceable(4) %this, i32 noundef %is_most_derived) -// CHECK2: call x86_thiscallcc noundef ptr @"??0A@test2@@QAE@XZ"(ptr {{[^,]*}} %{{.*}}) -// CHECK2: ret - } namespace test3 { @@ -480,9 +484,6 @@ struct B { struct C : virtual B {}; struct D : virtual A, C {}; D d; -// CHECK2-LABEL: define linkonce_odr dso_local x86_thiscallcc noundef ptr @"??_GD@pr36921@@UAEPAXI@Z"( -// CHECK2: %[[THIS_RELOAD:.*]] = load ptr, ptr -// CHECK2: %[[THIS_ADJ_i8:.*]] = getelementptr inbounds i8, ptr %[[THIS_RELOAD]], i32 -4 } namespace issue_60465 { diff --git a/clang/test/CodeGenCXX/microsoft-abi-vtables-multiple-nonvirtual-inheritance-vdtors.cpp b/clang/test/CodeGenCXX/microsoft-abi-vtables-multiple-nonvirtual-inheritance-vdtors.cpp index a407766f8ed9f..74150b0ecb535 100644 --- a/clang/test/CodeGenCXX/microsoft-abi-vtables-multiple-nonvirtual-inheritance-vdtors.cpp +++ b/clang/test/CodeGenCXX/microsoft-abi-vtables-multiple-nonvirtual-inheritance-vdtors.cpp @@ -12,18 +12,18 @@ struct B { struct C : A, B { // CHECK-LABEL: VFTable for 'A' in 'C' (2 entries). - // CHECK-NEXT: 0 | C::~C() [scalar deleting] + // CHECK-NEXT: 0 | C::~C() [vector deleting] // CHECK-NEXT: 1 | void A::z1() // CHECK-LABEL: VFTable for 'B' in 'C' (1 entry). - // CHECK-NEXT: 0 | C::~C() [scalar deleting] + // CHECK-NEXT: 0 | C::~C() [vector deleting] // CHECK-NEXT: [this adjustment: -4 non-virtual] // CHECK-LABEL: Thunks for 'C::~C()' (1 entry). // CHECK-NEXT: 0 | [this adjustment: -4 non-virtual] // CHECK-LABEL: VFTable indices for 'C' (1 entry). - // CHECK-NEXT: 0 | C::~C() [scalar deleting] + // CHECK-NEXT: 0 | C::~C() [vector deleting] virtual ~C(); }; @@ -41,7 +41,7 @@ struct E : D, B { // CHECK-NEXT: 0 | void D::z4() // CHECK-LABEL: VFTable for 'B' in 'E' (1 entry). - // CHECK-NEXT: 0 | E::~E() [scalar deleting] + // CHECK-NEXT: 0 | E::~E() [vector deleting] // CHECK-NEXT: [this adjustment: -4 non-virtual] // CHECK-LABEL: Thunks for 'E::~E()' (1 entry). @@ -49,7 +49,7 @@ struct E : D, B { // CHECK-LABEL: VFTable indices for 'E' (1 entry). // CHECK-NEXT: -- accessible via vfptr at offset 4 -- - // CHECK-NEXT: 0 | E::~E() [scalar deleting] + // CHECK-NEXT: 0 | E::~E() [vector deleting] }; void build_vftable(E *obj) { delete obj; } @@ -61,7 +61,7 @@ struct F : D, B { // CHECK-NEXT: 0 | void D::z4() // CHECK-LABEL: VFTable for 'B' in 'F' (1 entry). - // CHECK-NEXT: 0 | F::~F() [scalar deleting] + // CHECK-NEXT: 0 | F::~F() [vector deleting] // CHECK-NEXT: [this adjustment: -4 non-virtual] // CHECK-LABEL: Thunks for 'F::~F()' (1 entry). @@ -69,7 +69,7 @@ struct F : D, B { // CHECK-LABEL: VFTable indices for 'F' (1 entry). // CHECK-NEXT: -- accessible via vfptr at offset 4 -- - // CHECK-NEXT: 0 | F::~F() [scalar deleting] + // CHECK-NEXT: 0 | F::~F() [vector deleting] }; void build_vftable(F *obj) { delete obj; } @@ -79,7 +79,7 @@ struct G : F { // CHECK-NEXT: 0 | void D::z4() // CHECK-LABEL: VFTable for 'B' in 'F' in 'G' (1 entry). - // CHECK-NEXT: 0 | G::~G() [scalar deleting] + // CHECK-NEXT: 0 | G::~G() [vector deleting] // CHECK-NEXT: [this adjustment: -4 non-virtual] // CHECK-LABEL: Thunks for 'G::~G()' (1 entry). @@ -87,7 +87,7 @@ struct G : F { // CHECK-LABEL: VFTable indices for 'G' (1 entry). // CHECK-NEXT: -- accessible via vfptr at offset 4 -- - // CHECK-NEXT: 0 | G::~G() [scalar deleting] + // CHECK-NEXT: 0 | G::~G() [vector deleting] virtual ~G(); }; diff --git a/clang/test/CodeGenCXX/microsoft-abi-vtables-return-thunks.cpp b/clang/test/CodeGenCXX/microsoft-abi-vtables-return-thunks.cpp index 5030a5dcd2a50..1a589370d3a74 100644 --- a/clang/test/CodeGenCXX/microsoft-abi-vtables-return-thunks.cpp +++ b/clang/test/CodeGenCXX/microsoft-abi-vtables-return-thunks.cpp @@ -213,6 +213,6 @@ struct C : virtual B { C *f(); }; C c; // VFTABLES-LABEL: VFTable indices for 'pr34302::C' (2 entries). // VFTABLES-NEXT: -- accessible via vbtable index 1, vfptr at offset 0 -- -// VFTABLES-NEXT: 0 | pr34302::C::~C() [scalar deleting] +// VFTABLES-NEXT: 0 | pr34302::C::~C() [vector deleting] // VFTABLES-NEXT: 2 | C *pr34302::C::f() } diff --git a/clang/test/CodeGenCXX/microsoft-abi-vtables-single-inheritance.cpp b/clang/test/CodeGenCXX/microsoft-abi-vtables-single-inheritance.cpp index b0bf927d38f7c..c95202e8cc253 100644 --- a/clang/test/CodeGenCXX/microsoft-abi-vtables-single-inheritance.cpp +++ b/clang/test/CodeGenCXX/microsoft-abi-vtables-single-inheritance.cpp @@ -44,10 +44,10 @@ void use(B *obj) { obj->f(); } struct C { // CHECK-LABEL: VFTable for 'C' (2 entries) - // CHECK-NEXT: 0 | C::~C() [scalar deleting] + // CHECK-NEXT: 0 | C::~C() [vector deleting] // CHECK-NEXT: 1 | void C::f() // CHECK-LABEL: VFTable indices for 'C' (2 entries). - // CHECK-NEXT: 0 | C::~C() [scalar deleting] + // CHECK-NEXT: 0 | C::~C() [vector deleting] // CHECK-NEXT: 1 | void C::f() virtual ~C(); @@ -60,10 +60,10 @@ void use(C *obj) { obj->f(); } struct D { // CHECK-LABEL: VFTable for 'D' (2 entries) // CHECK-NEXT: 0 | void D::f() - // CHECK-NEXT: 1 | D::~D() [scalar deleting] + // CHECK-NEXT: 1 | D::~D() [vector deleting] // CHECK-LABEL: VFTable indices for 'D' (2 entries) // CHECK-NEXT: 0 | void D::f() - // CHECK-NEXT: 1 | D::~D() [scalar deleting] + // CHECK-NEXT: 1 | D::~D() [vector deleting] virtual void f(); virtual ~D(); @@ -77,10 +77,10 @@ struct E : A { // CHECK-NEXT: 0 | void A::f() // CHECK-NEXT: 1 | void A::g() // CHECK-NEXT: 2 | void A::h() - // CHECK-NEXT: 3 | E::~E() [scalar deleting] + // CHECK-NEXT: 3 | E::~E() [vector deleting] // CHECK-NEXT: 4 | void E::i() // CHECK-LABEL: VFTable indices for 'E' (2 entries). - // CHECK-NEXT: 3 | E::~E() [scalar deleting] + // CHECK-NEXT: 3 | E::~E() [vector deleting] // CHECK-NEXT: 4 | void E::i() // ~E would be the key method, but it isn't used, and MS ABI has no key @@ -98,10 +98,10 @@ struct F : A { // CHECK-NEXT: 1 | void A::g() // CHECK-NEXT: 2 | void A::h() // CHECK-NEXT: 3 | void F::i() - // CHECK-NEXT: 4 | F::~F() [scalar deleting] + // CHECK-NEXT: 4 | F::~F() [vector deleting] // CHECK-LABEL: VFTable indices for 'F' (2 entries). // CHECK-NEXT: 3 | void F::i() - // CHECK-NEXT: 4 | F::~F() [scalar deleting] + // CHECK-NEXT: 4 | F::~F() [vector deleting] virtual void i(); virtual ~F(); @@ -115,12 +115,12 @@ struct G : E { // CHECK-NEXT: 0 | void G::f() // CHECK-NEXT: 1 | void A::g() // CHECK-NEXT: 2 | void A::h() - // CHECK-NEXT: 3 | G::~G() [scalar deleting] + // CHECK-NEXT: 3 | G::~G() [vector deleting] // CHECK-NEXT: 4 | void E::i() // CHECK-NEXT: 5 | void G::j() // CHECK-LABEL: VFTable indices for 'G' (3 entries). // CHECK-NEXT: 0 | void G::f() - // CHECK-NEXT: 3 | G::~G() [scalar deleting] + // CHECK-NEXT: 3 | G::~G() [vector deleting] // CHECK-NEXT: 5 | void G::j() virtual void f(); // overrides A::f() diff --git a/clang/test/CodeGenCXX/microsoft-abi-vtables-virtual-inheritance-vtordisps.cpp b/clang/test/CodeGenCXX/microsoft-abi-vtables-virtual-inheritance-vtordisps.cpp index c5ce69f5cbcac..be9f281560dcf 100644 --- a/clang/test/CodeGenCXX/microsoft-abi-vtables-virtual-inheritance-vtordisps.cpp +++ b/clang/test/CodeGenCXX/microsoft-abi-vtables-virtual-inheritance-vtordisps.cpp @@ -57,7 +57,7 @@ struct A : virtual V1 { // CHECK-LABEL: VFTable for 'V1' in 'simple::A' (2 entries). // CHECK-NEXT: 0 | void simple::A::f() // CHECK-NEXT: [this adjustment: vtordisp at -4, 0 non-virtual] - // CHECK-NEXT: 1 | simple::A::~A() [scalar deleting] + // CHECK-NEXT: 1 | simple::A::~A() [vector deleting] // CHECK-NEXT: [this adjustment: vtordisp at -4, 0 non-virtual] // CHECK-LABEL: Thunks for 'simple::A::~A()' (1 entry). @@ -79,7 +79,7 @@ void use(A *obj) { obj->f(); } struct B : virtual V3 { // CHECK-LABEL: VFTable for 'Z' in 'V3' in 'simple::B' (2 entries). // CHECK-NEXT: 0 | void Z::g() - // CHECK-NEXT: 1 | simple::B::~B() [scalar deleting] + // CHECK-NEXT: 1 | simple::B::~B() [vector deleting] // CHECK-NEXT: [this adjustment: vtordisp at -4, 0 non-virtual] // CHECK-LABEL: Thunks for 'simple::B::~B()' (1 entry). @@ -88,7 +88,7 @@ struct B : virtual V3 { // CHECK-LABEL: VFTable for 'V2' in 'V3' in 'simple::B' (2 entries). // CHECK-NEXT: 0 | void simple::B::f() // CHECK-NEXT: [this adjustment: vtordisp at -12, 0 non-virtual] - // CHECK-NEXT: 1 | simple::B::~B() [scalar deleting] + // CHECK-NEXT: 1 | simple::B::~B() [vector deleting] // CHECK-NEXT: [this adjustment: vtordisp at -12, -8 non-virtual] // CHECK-LABEL: Thunks for 'simple::B::~B()' (1 entry). @@ -115,7 +115,7 @@ void use(B *obj) { obj->f(); } struct C : virtual V4 { // CHECK-LABEL: VFTable for 'Z' in 'V4' in 'simple::C' (2 entries). // CHECK-NEXT: 0 | void Z::g() - // CHECK-NEXT: 1 | simple::C::~C() [scalar deleting] + // CHECK-NEXT: 1 | simple::C::~C() [vector deleting] // CHECK-NEXT: [this adjustment: vtordisp at -4, 0 non-virtual] // CHECK-LABEL: Thunks for 'simple::C::~C()' (1 entry). @@ -124,7 +124,7 @@ struct C : virtual V4 { // CHECK-LABEL: VFTable for 'V1' in 'V4' in 'simple::C' (2 entries). // CHECK-NEXT: 0 | void simple::C::f() // CHECK-NEXT: [this adjustment: vtordisp at -12, 0 non-virtual] - // CHECK-NEXT: 1 | simple::C::~C() [scalar deleting] + // CHECK-NEXT: 1 | simple::C::~C() [vector deleting] // CHECK-NEXT: [this adjustment: vtordisp at -12, -8 non-virtual] // CHECK-LABEL: Thunks for 'simple::C::~C()' (1 entry). @@ -136,7 +136,7 @@ struct C : virtual V4 { // CHECK-LABEL: VFTable for 'V2' in 'V4' in 'simple::C' (2 entries). // CHECK-NEXT: 0 | void simple::C::f() // CHECK-NEXT: [this adjustment: vtordisp at -16, -4 non-virtual] - // CHECK-NEXT: 1 | simple::C::~C() [scalar deleting] + // CHECK-NEXT: 1 | simple::C::~C() [vector deleting] // CHECK-NEXT: [this adjustment: vtordisp at -16, -12 non-virtual] // CHECK-LABEL: Thunks for 'simple::C::~C()' (1 entry). @@ -162,7 +162,7 @@ class D : B { // CHECK-LABEL: VFTable for 'V2' in 'V3' in 'simple::B' in 'simple::D' (2 entries). // CHECK-NEXT: 0 | void simple::B::f() // CHECK-NEXT: [this adjustment: vtordisp at -12, -4 non-virtual] - // CHECK-NEXT: 1 | simple::D::~D() [scalar deleting] + // CHECK-NEXT: 1 | simple::D::~D() [vector deleting] // CHECK-NEXT: [this adjustment: vtordisp at -12, -8 non-virtual] D(); int z; @@ -180,12 +180,12 @@ struct F : virtual E { // CHECK-LABEL: VFTable for 'Z' in 'V3' in 'simple::E' in 'simple::F' (2 entries). // CHECK-NEXT: 0 | void simple::F::g() // CHECK-NEXT: [this adjustment: vtordisp at -4, 0 non-virtual] - // CHECK-NEXT: 1 | simple::F::~F() [scalar deleting] + // CHECK-NEXT: 1 | simple::F::~F() [vector deleting] // CHECK-NEXT: [this adjustment: vtordisp at -4, 0 non-virtual] // CHECK-LABEL: VFTable for 'V2' in 'V3' in 'simple::E' in 'simple::F' (2 entries). // CHECK-NEXT: 0 | void simple::E::f() - // CHECK-NEXT: 1 | simple::F::~F() [scalar deleting] + // CHECK-NEXT: 1 | simple::F::~F() [vector deleting] // CHECK-NEXT: [this adjustment: vtordisp at -12, -8 non-virtual] F(); @@ -202,12 +202,12 @@ struct G : F { // CHECK-LABEL: VFTable for 'Z' in 'V3' in 'simple::E' in 'simple::F' in 'simple::G' (2 entries). // CHECK-NEXT: 0 | void simple::F::g() // CHECK-NEXT: [this adjustment: vtordisp at -4, -4 non-virtual] - // CHECK-NEXT: 1 | simple::G::~G() [scalar deleting] + // CHECK-NEXT: 1 | simple::G::~G() [vector deleting] // CHECK-NEXT: [this adjustment: vtordisp at -4, 0 non-virtual] // CHECK-LABEL: VFTable for 'V2' in 'V3' in 'simple::E' in 'simple::F' in 'simple::G' (2 entries). // CHECK-NEXT: 0 | void simple::E::f() - // CHECK-NEXT: 1 | simple::G::~G() [scalar deleting] + // CHECK-NEXT: 1 | simple::G::~G() [vector deleting] // CHECK-NEXT: [this adjustment: vtordisp at -12, -8 non-virtual] G(); @@ -240,7 +240,7 @@ struct A : virtual simple::A { // CHECK-NEXT: 0 | void simple::A::f() // CHECK-NEXT: [this adjustment: vtordisp at -4, vbptr at 8 to the left, // CHECK-NEXT: vboffset at 8 in the vbtable, 8 non-virtual] - // CHECK-NEXT: 1 | extended::A::~A() [scalar deleting] + // CHECK-NEXT: 1 | extended::A::~A() [vector deleting] // CHECK-NEXT: [this adjustment: vtordisp at -4, 0 non-virtual] // CHECK-LABEL: Thunks for 'void simple::A::f()' (1 entry). @@ -265,7 +265,7 @@ struct B : virtual simple::A { // CHECK-LABEL: VFTable for 'V1' in 'simple::A' in 'extended::B' (2 entries). // ... - // CHECK: 1 | extended::B::~B() [scalar deleting] + // CHECK: 1 | extended::B::~B() [vector deleting] // CHECK-NEXT: [this adjustment: vtordisp at -4, 0 non-virtual] // CHECK-LABEL: Thunks for 'void simple::A::f()' (1 entry). @@ -353,7 +353,7 @@ struct G : virtual simple::A { // CHECK-NEXT: 0 | void simple::A::f() // CHECK-NEXT: [this adjustment: vtordisp at -4, vbptr at 8 to the left, // CHECK-NEXT: vboffset at 8 in the vbtable, 8 non-virtual] - // CHECK-NEXT: 1 | extended::G::~G() [scalar deleting] + // CHECK-NEXT: 1 | extended::G::~G() [vector deleting] // CHECK-NEXT: [this adjustment: vtordisp at -4, 0 non-virtual] // CHECK-LABEL: Thunks for 'void simple::A::f()' (1 entry). @@ -374,7 +374,7 @@ void use(G *obj) { obj->g(); } struct H : Z, A { // CHECK-LABEL: VFTable for 'Z' in 'extended::H' (2 entries). // CHECK-NEXT: 0 | void Z::g() - // CHECK-NEXT: 1 | extended::H::~H() [scalar deleting] + // CHECK-NEXT: 1 | extended::H::~H() [vector deleting] // CHECK-LABEL: VFTable for 'V1' in 'simple::A' in 'extended::A' in 'extended::H' (2 entries). // CHECK-NEXT: 0 | void simple::A::f() diff --git a/clang/test/CodeGenCXX/microsoft-abi-vtables-virtual-inheritance.cpp b/clang/test/CodeGenCXX/microsoft-abi-vtables-virtual-inheritance.cpp index 257ba270291c8..e5e6ea5f42c1c 100644 --- a/clang/test/CodeGenCXX/microsoft-abi-vtables-virtual-inheritance.cpp +++ b/clang/test/CodeGenCXX/microsoft-abi-vtables-virtual-inheritance.cpp @@ -492,7 +492,7 @@ struct X { struct Y : virtual X { // CHECK-LABEL: VFTable for 'vdtors::X' in 'vdtors::Y' (2 entries). - // CHECK-NEXT: 0 | vdtors::Y::~Y() [scalar deleting] + // CHECK-NEXT: 0 | vdtors::Y::~Y() [vector deleting] // CHECK-NEXT: 1 | void vdtors::X::zzz() // CHECK-NOT: Thunks for 'vdtors::Y::~Y()' @@ -515,7 +515,7 @@ struct U : virtual W { // CHECK-NEXT: 0 | void vdtors::Z::z() // CHECK-LABEL: VFTable for 'vdtors::X' in 'vdtors::W' in 'vdtors::U' (2 entries). - // CHECK-NEXT: 0 | vdtors::U::~U() [scalar deleting] + // CHECK-NEXT: 0 | vdtors::U::~U() [vector deleting] // CHECK-NEXT: [this adjustment: -4 non-virtual] // CHECK-NEXT: 1 | void vdtors::X::zzz() @@ -524,7 +524,7 @@ struct U : virtual W { // CHECK-LABEL: VFTable indices for 'vdtors::U' (1 entry). // CHECK-NEXT: -- accessible via vbtable index 1, vfptr at offset 4 -- - // CHECK-NEXT: 0 | vdtors::U::~U() [scalar deleting] + // CHECK-NEXT: 0 | vdtors::U::~U() [vector deleting] virtual ~U(); }; @@ -536,7 +536,7 @@ struct V : virtual W { // CHECK-NEXT: 0 | void vdtors::Z::z() // CHECK-LABEL: VFTable for 'vdtors::X' in 'vdtors::W' in 'vdtors::V' (2 entries). - // CHECK-NEXT: 0 | vdtors::V::~V() [scalar deleting] + // CHECK-NEXT: 0 | vdtors::V::~V() [vector deleting] // CHECK-NEXT: [this adjustment: -4 non-virtual] // CHECK-NEXT: 1 | void vdtors::X::zzz() @@ -545,7 +545,7 @@ struct V : virtual W { // CHECK-LABEL: VFTable indices for 'vdtors::V' (1 entry). // CHECK-NEXT: -- accessible via vbtable index 1, vfptr at offset 4 -- - // CHECK-NEXT: 0 | vdtors::V::~V() [scalar deleting] + // CHECK-NEXT: 0 | vdtors::V::~V() [vector deleting] }; V v; @@ -557,7 +557,7 @@ struct T : virtual X { struct P : T, Y { // CHECK-LABEL: VFTable for 'vdtors::X' in 'vdtors::T' in 'vdtors::P' (2 entries). - // CHECK-NEXT: 0 | vdtors::P::~P() [scalar deleting] + // CHECK-NEXT: 0 | vdtors::P::~P() [vector deleting] // CHECK-NEXT: 1 | void vdtors::X::zzz() // CHECK-NOT: Thunks for 'vdtors::P::~P()' @@ -574,18 +574,18 @@ struct Q { // PR19172: Yet another diamond we miscompiled. struct R : virtual Q, X { // CHECK-LABEL: VFTable for 'vdtors::Q' in 'vdtors::R' (1 entry). - // CHECK-NEXT: 0 | vdtors::R::~R() [scalar deleting] + // CHECK-NEXT: 0 | vdtors::R::~R() [vector deleting] // CHECK-NEXT: [this adjustment: -8 non-virtual] // CHECK-LABEL: Thunks for 'vdtors::R::~R()' (1 entry). // CHECK-NEXT: 0 | [this adjustment: -8 non-virtual] // CHECK-LABEL: VFTable for 'vdtors::X' in 'vdtors::R' (2 entries). - // CHECK-NEXT: 0 | vdtors::R::~R() [scalar deleting] + // CHECK-NEXT: 0 | vdtors::R::~R() [vector deleting] // CHECK-NEXT: 1 | void vdtors::X::zzz() // CHECK-LABEL: VFTable indices for 'vdtors::R' (1 entry). - // CHECK-NEXT: 0 | vdtors::R::~R() [scalar deleting] + // CHECK-NEXT: 0 | vdtors::R::~R() [vector deleting] virtual ~R(); }; diff --git a/clang/test/CodeGenCXX/microsoft-no-rtti-data.cpp b/clang/test/CodeGenCXX/microsoft-no-rtti-data.cpp index 069f0226ab948..c8e374e51a031 100644 --- a/clang/test/CodeGenCXX/microsoft-no-rtti-data.cpp +++ b/clang/test/CodeGenCXX/microsoft-no-rtti-data.cpp @@ -2,7 +2,7 @@ // vftable shouldn't have RTTI data in it. // CHECK-NOT: @"??_R4S@@6B@" -// CHECK: @"??_7S@@6B@" = linkonce_odr unnamed_addr constant { [1 x ptr] } { [1 x ptr] [ptr @"??_GS@@UAEPAXI@Z"] }, comdat +// CHECK: @"??_7S@@6B@" = linkonce_odr unnamed_addr constant { [1 x ptr] } { [1 x ptr] [ptr @"??_ES@@UAEPAXI@Z"] }, comdat struct type_info; namespace std { using ::type_info; } diff --git a/clang/test/CodeGenCXX/microsoft-vector-deleting-dtors.cpp b/clang/test/CodeGenCXX/microsoft-vector-deleting-dtors.cpp new file mode 100644 index 0000000000000..ebff4f6a851b0 --- /dev/null +++ b/clang/test/CodeGenCXX/microsoft-vector-deleting-dtors.cpp @@ -0,0 +1,152 @@ +// RUN: %clang_cc1 -emit-llvm %s -triple=x86_64-pc-windows-msvc -o - | FileCheck --check-prefixes=X64,CHECK %s +// RUN: %clang_cc1 -emit-llvm %s -triple=i386-pc-windows-msvc -o - | FileCheck --check-prefixes=X86,CHECK %s + +struct Bird { + virtual ~Bird(); +}; + +struct Parrot : public Bird { +// X64: @[[ParrotVtable:[0-9]+]] = private unnamed_addr constant { [2 x ptr] } { [2 x ptr] [ptr @"??_R4Parrot@@6B@", ptr @"??_EParrot@@UEAAPEAXI@Z"] }, comdat($"??_7Parrot@@6B@") +// X86: @[[ParrotVtable:[0-9]+]] = private unnamed_addr constant { [2 x ptr] } { [2 x ptr] [ptr @"??_R4Parrot@@6B@", ptr @"??_EParrot@@UAEPAXI@Z"] }, comdat($"??_7Parrot@@6B@") +// X64: @[[Bird:[0-9]+]] = private unnamed_addr constant { [2 x ptr] } { [2 x ptr] [ptr @"??_R4Bird@@6B@", ptr @"??_EBird@@UEAAPEAXI@Z"] }, comdat($"??_7Bird@@6B@") +// X86: @[[Bird:[0-9]+]] = private unnamed_addr constant { [2 x ptr] } { [2 x ptr] [ptr @"??_R4Bird@@6B@", ptr @"??_EBird@@UAEPAXI@Z"] }, comdat($"??_7Bird@@6B@") + virtual ~Parrot() {} +}; + +Bird::~Bird() {} + +// For the weird bird we first emit scalar deleting destructor, then find out +// that we need vector deleting destructor and remove the alias. +struct JustAWeirdBird { + virtual ~JustAWeirdBird() {} + + bool doSmth(int n) { + JustAWeirdBird *c = new JustAWeirdBird[n]; + + delete[] c; + return true; + } +}; + +// Vector deleting dtor for Bird is an alias because no new Bird[] expressions +// in the TU. +// X64: @"??_EBird@@UEAAPEAXI@Z" = weak dso_local unnamed_addr alias ptr (ptr, i32), ptr @"??_GBird@@UEAAPEAXI@Z" +// X86: @"??_EBird@@UAEPAXI@Z" = weak dso_local unnamed_addr alias ptr (ptr, i32), ptr @"??_GBird@@UAEPAXI@Z" +// No scalar destructor for Parrot. +// CHECK-NOT: @"??_GParrot" +// No vector destructor definition for Bird. +// CHECK-NOT: define{{.*}}@"??_EBird" +// No scalar deleting dtor for JustAWeirdBird. +// CHECK-NOT: @"??_GJustAWeirdBird" + +void dealloc(Bird *p) { + delete[] p; +} + +Bird* alloc() { + Parrot* P = new Parrot[38]; + return P; +} + +void bar() { + dealloc(alloc()); + + JustAWeirdBird B; + B.doSmth(38); +} + +// CHECK-LABEL: define dso_local void @{{.*}}dealloc{{.*}}( +// CHECK-SAME: ptr noundef %[[PTR:.*]]) +// CHECK: entry: +// CHECK-NEXT: %[[PTRADDR:.*]] = alloca ptr +// CHECK-NEXT: store ptr %[[PTR]], ptr %[[PTRADDR]] +// CHECK-NEXT: %[[LPTR:.*]] = load ptr, ptr %[[PTRADDR]] +// CHECK-NEXT: %[[ISNULL:.*]] = icmp eq ptr %[[LPTR]], null +// CHECK-NEXT: br i1 %[[ISNULL]], label %delete.end, label %delete.notnull +// CHECK: delete.notnull: +// X64-NEXT: %[[COOKIEGEP:.*]] = getelementptr inbounds i8, ptr %[[LPTR]], i64 -8 +// X86-NEXT: %[[COOKIEGEP:.*]] = getelementptr inbounds i8, ptr %[[LPTR]], i32 -4 +// X64-NEXT: %[[HOWMANY:.*]] = load i64, ptr %[[COOKIEGEP]] +// X86-NEXT: %[[HOWMANY:.*]] = load i32, ptr %[[COOKIEGEP]] +// X64-NEXT: %[[ISNOELEM:.*]] = icmp eq i64 %2, 0 +// X86-NEXT: %[[ISNOELEM:.*]] = icmp eq i32 %2, 0 +// CHECK-NEXT: br i1 %[[ISNOELEM]], label %vdtor.nocall, label %vdtor.call +// CHECK: vdtor.nocall: +// X64-NEXT: %[[HOWMANYBYTES:.*]] = mul i64 8, %[[HOWMANY]] +// X86-NEXT: %[[HOWMANYBYTES:.*]] = mul i32 4, %[[HOWMANY]] +// X64-NEXT: %[[ADDCOOKIESIZE:.*]] = add i64 %[[HOWMANYBYTES]], 8 +// X86-NEXT: %[[ADDCOOKIESIZE:.*]] = add i32 %[[HOWMANYBYTES]], 4 +// X64-NEXT: call void @"??_V@YAXPEAX_K@Z"(ptr noundef %[[COOKIEGEP]], i64 noundef %[[ADDCOOKIESIZE]]) +// X86-NEXT: call void @"??_V@YAXPAXI@Z"(ptr noundef %[[COOKIEGEP]], i32 noundef %[[ADDCOOKIESIZE]]) +// CHECK-NEXT: br label %delete.end +// CHECK: vdtor.call: +// CHECK-NEXT: %[[VTABLE:.*]] = load ptr, ptr %[[LPTR]] +// CHECK-NEXT: %[[FPGEP:.*]] = getelementptr inbounds ptr, ptr %[[VTABLE]], i64 0 +// CHECK-NEXT: %[[FPLOAD:.*]] = load ptr, ptr %[[FPGEP]] +// X64-NEXT: %[[CALL:.*]] = call noundef ptr %[[FPLOAD]](ptr noundef nonnull align 8 dereferenceable(8) %[[LPTR]], i32 noundef 3) +// X86-NEXT: %[[CALL:.*]] = call x86_thiscallcc noundef ptr %[[FPLOAD]](ptr noundef nonnull align 4 dereferenceable(4) %[[LPTR]], i32 noundef 3) +// CHECK-NEXT: br label %delete.end +// CHECK: delete.end: +// CHECK-NEXT: ret void + +// Vector dtor definition for Parrot. +// X64-LABEL: define weak dso_local noundef ptr @"??_EParrot@@UEAAPEAXI@Z"( +// X64-SAME: ptr {{.*}} %[[THIS:.*]], i32 {{.*}} %[[IMPLICIT_PARAM:.*]]) unnamed_addr +// X86-LABEL: define weak dso_local x86_thiscallcc noundef ptr @"??_EParrot@@UAEPAXI@Z"( +// X86-SAME: ptr noundef nonnull align 4 dereferenceable(4) %[[THIS:.*]], i32 noundef %[[IMPLICIT_PARAM:.*]]) unnamed_addr +// CHECK: entry: +// CHECK-NEXT: %[[RET:.*]] = alloca ptr +// CHECK-NEXT: %[[IPADDR:.*]] = alloca i32 +// CHECK-NEXT: %[[THISADDR:.*]] = alloca ptr +// CHECK-NEXT: store i32 %[[IMPLICIT_PARAM]], ptr %[[IPADDR]] +// CHECK-NEXT: store ptr %[[THIS]], ptr %[[THISADDR]] +// CHECK-NEXT: %[[LTHIS:.*]] = load ptr, ptr %[[THISADDR]] +// CHECK-NEXT: store ptr %[[LTHIS]], ptr %[[RET]] +// CHECK-NEXT: %[[LIP:.*]] = load i32, ptr %[[IPADDR]] +// CHECK-NEXT: %[[SECONDBIT:.*]] = and i32 %[[LIP]], 2 +// CHECK-NEXT: %[[ISSECONDBITZERO:.*]] = icmp eq i32 %[[SECONDBIT]], 0 +// CHECK-NEXT: br i1 %[[ISSECONDBITZERO:.*]], label %dtor.scalar, label %dtor.vector +// CHECK: dtor.vector: +// X64-NEXT: %[[COOKIEGEP:.*]] = getelementptr inbounds i8, ptr %[[LTHIS]], i64 -8 +// X86-NEXT: %[[COOKIEGEP:.*]] = getelementptr inbounds i8, ptr %[[LTHIS]], i32 -4 +// X64-NEXT: %[[HOWMANY:.*]] = load i64, ptr %[[COOKIEGEP]] +// X86-NEXT: %[[HOWMANY:.*]] = load i32, ptr %[[COOKIEGEP]] +// X64-NEXT: %[[END:.*]] = getelementptr inbounds %struct.Parrot, ptr %[[LTHIS]], i64 %[[HOWMANY]] +// X86-NEXT: %[[END:.*]] = getelementptr inbounds %struct.Parrot, ptr %[[LTHIS]], i32 %[[HOWMANY]] +// CHECK-NEXT: br label %arraydestroy.body +// CHECK: arraydestroy.body: +// CHECK-NEXT: %[[PASTELEM:.*]] = phi ptr [ %delete.end, %dtor.vector ], [ %arraydestroy.element, %arraydestroy.body ] +// X64-NEXT: %[[CURELEM:.*]] = getelementptr inbounds %struct.Parrot, ptr %[[PASTELEM]], i64 -1 +// X86-NEXT: %[[CURELEM:.*]] = getelementptr inbounds %struct.Parrot, ptr %[[PASTELEM]], i32 -1 +// X64-NEXT: call void @"??1Parrot@@UEAA@XZ"(ptr noundef nonnull align 8 dereferenceable(8) %[[CURELEM]]) +// X86-NEXT: call x86_thiscallcc void @"??1Parrot@@UAE@XZ"(ptr noundef nonnull align 4 dereferenceable(4) %[[CURELEM]]) +// CHECK-NEXT: %[[DONE:.*]] = icmp eq ptr %[[CURELEM]], %[[LTHIS]] +// CHECK-NEXT: br i1 %[[DONE]], label %arraydestroy.done3, label %arraydestroy.body +// CHECK: arraydestroy.done3: +// CHECK-NEXT: br label %dtor.vector.cont +// CHECK: dtor.vector.cont: +// CHECK-NEXT: %[[FIRSTBIT:.*]] = and i32 %[[LIP]], 1 +// CHECK-NEXT: %[[ISFIRSTBITZERO:.*]] = icmp eq i32 %[[FIRSTBIT]], 0 +// CHECK-NEXT: br i1 %[[ISFIRSTBITZERO]], label %dtor.continue, label %dtor.call_delete_after_array_destroy +// CHECK: dtor.call_delete_after_array_destroy: +// X64-NEXT: call void @"??3@YAXPEAX_K@Z"(ptr noundef %[[COOKIEGEP]], i64 noundef 8) +// X86-NEXT: call void @"??3@YAXPAXI@Z"(ptr noundef %[[COOKIEGEP]], i32 noundef 4) +// CHECK-NEXT: br label %dtor.continue +// CHECK: dtor.scalar: +// X64-NEXT: call void @"??1Parrot@@UEAA@XZ"(ptr noundef nonnull align 8 dereferenceable(8) %[[LTHIS]]) +// X86-NEXT: call x86_thiscallcc void @"??1Parrot@@UAE@XZ"(ptr noundef nonnull align 4 dereferenceable(4) %[[LTHIS]]) +// CHECK-NEXT: %[[FIRSTBIT:.*]] = and i32 %[[LIP]], 1 +// CHECK-NEXT: %[[ISFIRSTBITZERO:.*]] = icmp eq i32 %[[FIRSTBIT]], 0 +// CHECK-NEXT: br i1 %[[ISFIRSTBITZERO]], label %dtor.continue, label %dtor.call_delete +// CHECK: dtor.call_delete: +// X64-NEXT: call void @"??3@YAXPEAX_K@Z"(ptr noundef %[[LTHIS]], i64 noundef 8) +// X86-NEXT: call void @"??3@YAXPAXI@Z"(ptr noundef %[[LTHIS]], i32 noundef 4) +// CHECK-NEXT: br label %dtor.continue +// CHECK: dtor.continue: +// CHECK-NEXT: %[[LOADRET:.*]] = load ptr, ptr %[[RET]] +// CHECK-NEXT: ret ptr %[[LOADRET]] + +// X64: define weak dso_local noundef ptr @"??_EJustAWeirdBird@@UEAAPEAXI@Z"( +// X64-SAME: ptr noundef nonnull align 8 dereferenceable(8) %this, i32 noundef %should_call_delete) +// X86: define weak dso_local x86_thiscallcc noundef ptr @"??_EJustAWeirdBird@@UAEPAXI@Z"( +// X86-SAME: ptr noundef nonnull align 4 dereferenceable(4) %this, i32 noundef %should_call_delete) unnamed_addr diff --git a/clang/test/CodeGenCXX/vtable-consteval.cpp b/clang/test/CodeGenCXX/vtable-consteval.cpp index 1454f6fde357d..220143465c574 100644 --- a/clang/test/CodeGenCXX/vtable-consteval.cpp +++ b/clang/test/CodeGenCXX/vtable-consteval.cpp @@ -26,7 +26,7 @@ struct B { B b; // ITANIUM-DAG: @_ZTV1C = {{.*}} constant { [4 x ptr] } {{.*}} null, ptr @_ZTI1C, ptr @_ZN1CD1Ev, ptr @_ZN1CD0Ev -// MSABI-DAG: @[[C_VFTABLE:.*]] = {{.*}} constant { [2 x ptr] } {{.*}} @"??_R4C@@6B@", ptr @"??_GC@@UEAAPEAXI@Z" +// MSABI-DAG: @[[C_VFTABLE:.*]] = {{.*}} constant { [2 x ptr] } {{.*}} @"??_R4C@@6B@", ptr @"??_EC@@UEAAPEAXI@Z" struct C { virtual ~C() = default; virtual consteval C &operator=(const C&) = default; @@ -36,7 +36,7 @@ struct C { C c; // ITANIUM-DAG: @_ZTV1D = {{.*}} constant { [4 x ptr] } {{.*}} null, ptr @_ZTI1D, ptr @_ZN1DD1Ev, ptr @_ZN1DD0Ev -// MSABI-DAG: @[[D_VFTABLE:.*]] = {{.*}} constant { [2 x ptr] } {{.*}} @"??_R4D@@6B@", ptr @"??_GD@@UEAAPEAXI@Z" +// MSABI-DAG: @[[D_VFTABLE:.*]] = {{.*}} constant { [2 x ptr] } {{.*}} @"??_R4D@@6B@", ptr @"??_ED@@UEAAPEAXI@Z" struct D : C {}; // ITANIUM-DAG: @d = {{.*}}global { ptr } { {{.*}} @_ZTV1D, // MSABI-DAG: @"?d@@3UD@@A" = {{.*}}global { ptr } { ptr @"??_7D@@6B@" } diff --git a/clang/test/Driver/x86-target-features.c b/clang/test/Driver/x86-target-features.c index 18361251dcebc..6416a34898e78 100644 --- a/clang/test/Driver/x86-target-features.c +++ b/clang/test/Driver/x86-target-features.c @@ -390,15 +390,12 @@ // AVXVNNIINT16: "-target-feature" "+avxvnniint16" // NO-AVXVNNIINT16: "-target-feature" "-avxvnniint16" -// RUN: %clang --target=i386 -mevex512 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=EVEX512 %s -// RUN: %clang --target=i386 -mno-evex512 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-EVEX512 %s -// EVEX512: "-target-feature" "+evex512" -// NO-EVEX512: "-target-feature" "-evex512" - -// RUN: not %clang --target=i386 -march=i386 -mavx10.1 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=UNSUPPORT-AVX10 %s -// RUN: not %clang --target=i386 -march=i386 -mno-avx10.1 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=UNSUPPORT-AVX10 %s -// RUN: %clang --target=i386 -mavx10.1-256 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVX10_1_256 %s -// RUN: %clang --target=i386 -mavx10.1-512 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVX10_1_512 %s +// RUN: %clang --target=i386 -mevex512 %s -### -o %t.o 2>&1 | FileCheck -check-prefixes=EVEX512,WARN-EVEX512 %s +// RUN: %clang --target=i386 -mno-evex512 %s -### -o %t.o 2>&1 | FileCheck -check-prefixes=NO-EVEX512,WARN-EVEX512 %s +// RUN: %clang --target=i386 -march=i386 -mavx10.1 %s -### -o %t.o 2>&1 -Werror | FileCheck -check-prefix=AVX10_1_512 %s +// RUN: %clang --target=i386 -march=i386 -mno-avx10.1 %s -### -o %t.o 2>&1 -Werror | FileCheck -check-prefix=NO-AVX10_1 %s +// RUN: %clang --target=i386 -mavx10.1-256 %s -### -o %t.o 2>&1 | FileCheck -check-prefixes=AVX10_1_256,WARN-AVX10-256 %s +// RUN: %clang --target=i386 -mavx10.1-512 %s -### -o %t.o 2>&1 | FileCheck -check-prefixes=AVX10_1_512,WARN-AVX10-512 %s // RUN: %clang --target=i386 -mavx10.1-256 -mavx10.1-512 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVX10_1_512 %s // RUN: %clang --target=i386 -mavx10.1-512 -mavx10.1-256 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVX10_1_256 %s // RUN: not %clang --target=i386 -march=i386 -mavx10.1-128 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=BAD-AVX10 %s @@ -406,15 +403,20 @@ // RUN: not %clang --target=i386 -march=i386 -mavx10.1024-512 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=BAD-AVX10 %s // RUN: %clang --target=i386 -march=i386 -mavx10.1-256 -mavx512f %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVX10-AVX512 %s // RUN: %clang --target=i386 -march=i386 -mavx10.1-256 -mno-avx512f %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVX10-AVX512 %s -// RUN: %clang --target=i386 -march=i386 -mavx10.1-256 -mevex512 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVX10-EVEX512 %s -// RUN: %clang --target=i386 -march=i386 -mavx10.1-256 -mno-evex512 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVX10-EVEX512 %s -// RUN: %clang --target=i386 -mavx10.2 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVX10_2_512 %s -// RUN: %clang --target=i386 -mno-avx10.2 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-AVX10_2 %s -// RUN: %clang --target=i386 -mavx10.2-256 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVX10_2_256 %s -// RUN: %clang --target=i386 -mavx10.2-512 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVX10_2_512 %s +// RUN: %clang --target=i386 -march=i386 -mavx10.1-256 -mevex512 %s -### -o %t.o 2>&1 | FileCheck -check-prefixes=AVX10-EVEX512,WARN-EVEX512 %s +// RUN: %clang --target=i386 -march=i386 -mavx10.1-256 -mno-evex512 %s -### -o %t.o 2>&1 | FileCheck -check-prefixes=AVX10-EVEX512,WARN-EVEX512 %s +// RUN: %clang --target=i386 -mavx10.2 %s -### -o %t.o 2>&1 -Werror | FileCheck -check-prefix=AVX10_2_512 %s +// RUN: %clang --target=i386 -mno-avx10.2 %s -### -o %t.o 2>&1 -Werror | FileCheck -check-prefix=NO-AVX10_2 %s +// RUN: %clang --target=i386 -mavx10.2-256 %s -### -o %t.o 2>&1 | FileCheck -check-prefixes=AVX10_2_256,WARN-AVX10-256 %s +// RUN: %clang --target=i386 -mavx10.2-512 %s -### -o %t.o 2>&1 | FileCheck -check-prefixes=AVX10_2_512,WARN-AVX10-512 %s // RUN: %clang --target=i386 -mavx10.2-256 -mavx10.1-512 %s -### -o %t.o 2>&1 | FileCheck -check-prefixes=AVX10_2_256,AVX10_1_512 %s // RUN: %clang --target=i386 -mavx10.2-512 -mavx10.1-256 %s -### -o %t.o 2>&1 | FileCheck -check-prefixes=AVX10_2_512,AVX10_1_256 %s -// UNSUPPORT-AVX10: error: unsupported option '-m{{.*}}avx10.1' for target 'i386' +// WARN-EVEX512: warning: argument '{{.*}}evex512' is deprecated, because AVX10/256 is not supported and will be removed [-Wdeprecated] +// WARN-AVX10-256: warning: argument 'avx10.{{.*}}-256' is deprecated, because AVX10/256 is not supported and will be removed [-Wdeprecated] +// WARN-AVX10-512: warning: argument 'avx10.{{.*}}-512' is deprecated, use 'avx10.{{.*}}' instead [-Wdeprecated] +// EVEX512: "-target-feature" "+evex512" +// NO-EVEX512: "-target-feature" "-evex512" +// NO-AVX10_1: "-target-feature" "-avx10.1-256" // NO-AVX10_2: "-target-feature" "-avx10.2-256" // AVX10_2_256: "-target-feature" "+avx10.2-256" // AVX10_2_512: "-target-feature" "+avx10.2-512" diff --git a/clang/test/Modules/vtable-windows.cppm b/clang/test/Modules/vtable-windows.cppm index dbde24c8a9bdd..e45e32d6b4d60 100644 --- a/clang/test/Modules/vtable-windows.cppm +++ b/clang/test/Modules/vtable-windows.cppm @@ -23,4 +23,4 @@ void test() { // Check that the virtual table is an unnamed_addr constant in comdat that can // be merged with the virtual table with other TUs. -// CHECK: unnamed_addr constant {{.*}}[ptr @"??_R4Fruit@@6B@", ptr @"??_GFruit@@UAEPAXI@Z", ptr @"?eval@Fruit@@UAEXXZ"{{.*}}comdat($"??_7Fruit@@6B@") +// CHECK: unnamed_addr constant {{.*}}[ptr @"??_R4Fruit@@6B@", ptr @"??_EFruit@@UAEPAXI@Z", ptr @"?eval@Fruit@@UAEXXZ"{{.*}}comdat($"??_7Fruit@@6B@") diff --git a/clang/test/Profile/cxx-abc-deleting-dtor.cpp b/clang/test/Profile/cxx-abc-deleting-dtor.cpp index c65a8e8013c35..7c2a5bbc93af3 100644 --- a/clang/test/Profile/cxx-abc-deleting-dtor.cpp +++ b/clang/test/Profile/cxx-abc-deleting-dtor.cpp @@ -24,16 +24,15 @@ DerivedABC *useABCVTable() { return new DerivedABC(); } // MSVC: @"__profn_??1ABC@@{{.*}}" = // MSVC-NOT: @"__profn_??_G{{.*}}" = -// MSVC-LABEL: define linkonce_odr dso_local noundef ptr @"??_GDerivedABC@@UEAAPEAXI@Z"(ptr {{[^,]*}} %this, {{.*}}) -// MSVC-NOT: call void @llvm.instrprof.increment({{.*}}) -// MSVC: call void @"??1DerivedABC@@UEAA@XZ"({{.*}}) -// MSVC: ret void - // MSVC-LABEL: define linkonce_odr dso_local noundef ptr @"??_GABC@@UEAAPEAXI@Z"(ptr {{[^,]*}} %this, {{.*}}) // MSVC-NOT: call void @llvm.instrprof.increment({{.*}}) // MSVC: call void @llvm.trap() // MSVC-NEXT: unreachable +// MSVC-LABEL: define linkonce_odr dso_local noundef ptr @"??_GDerivedABC@@UEAAPEAXI@Z"(ptr {{[^,]*}} %this, {{.*}}) +// MSVC-NOT: call void @llvm.instrprof.increment({{.*}}) +// MSVC: call void @"??1DerivedABC@@UEAA@XZ"({{.*}}) + // MSVC-LABEL: define linkonce_odr dso_local void @"??1DerivedABC@@UEAA@XZ"({{.*}}) // MSVC: call void @llvm.instrprof.increment({{.*}}) // MSVC: call void @"??1ABC@@UEAA@XZ"({{.*}}) diff --git a/clang/test/SemaCXX/MicrosoftExtensions.cpp b/clang/test/SemaCXX/MicrosoftExtensions.cpp index 7454a01158f6b..9f6939c1681c9 100644 --- a/clang/test/SemaCXX/MicrosoftExtensions.cpp +++ b/clang/test/SemaCXX/MicrosoftExtensions.cpp @@ -470,6 +470,7 @@ struct InheritFromSealed : SealedType {}; class SealedDestructor { // expected-note {{mark 'SealedDestructor' as 'sealed' to silence this warning}} // expected-warning@+1 {{'sealed' keyword is a Microsoft extension}} virtual ~SealedDestructor() sealed; // expected-warning {{class with destructor marked 'sealed' cannot be inherited from}} + // expected-warning@-1 {{virtual method '~SealedDestructor' is inside a 'final' class}} }; // expected-warning@+1 {{'abstract' keyword is a Microsoft extension}} diff --git a/clang/test/SemaCXX/ms_struct.cpp b/clang/test/SemaCXX/ms_struct.cpp index 995e424d1f876..409350f2606a9 100644 --- a/clang/test/SemaCXX/ms_struct.cpp +++ b/clang/test/SemaCXX/ms_struct.cpp @@ -25,7 +25,7 @@ struct B : public A { static_assert(__builtin_offsetof(B, d) == 12, "We can't allocate the bitfield into the padding under ms_struct"); -// expected-warning@-2 {{offset of on non-standard-layout type 'B'}} +// expected-warning@-2 {{'offsetof' on non-standard-layout type 'B'}} struct C { #ifdef TEST_FOR_ERROR @@ -39,5 +39,5 @@ struct C { static_assert(__builtin_offsetof(C, n) == 8, "long long field in ms_struct should be 8-byte aligned"); -// expected-warning@-2 {{offset of on non-standard-layout type 'C'}} +// expected-warning@-2 {{'offsetof' on non-standard-layout type 'C'}} diff --git a/clang/test/SemaCXX/offsetof-0x.cpp b/clang/test/SemaCXX/offsetof-0x.cpp index a3fe2fbbad72d..d8d417b6885c4 100644 --- a/clang/test/SemaCXX/offsetof-0x.cpp +++ b/clang/test/SemaCXX/offsetof-0x.cpp @@ -11,7 +11,7 @@ struct P { }; void f() { - int i = __builtin_offsetof(P, fieldThatPointsToANonPODType.m); // expected-warning{{offset of on non-standard-layout type 'P'}} + int i = __builtin_offsetof(P, fieldThatPointsToANonPODType.m); // expected-warning{{'offsetof' on non-standard-layout type 'P'}} } struct StandardLayout { diff --git a/clang/test/SemaCXX/offsetof.cpp b/clang/test/SemaCXX/offsetof.cpp index 1722b91fafc86..367a907f03775 100644 --- a/clang/test/SemaCXX/offsetof.cpp +++ b/clang/test/SemaCXX/offsetof.cpp @@ -11,12 +11,12 @@ struct P { }; void f() { - int i = __builtin_offsetof(P, fieldThatPointsToANonPODType.m); // expected-warning{{offset of on non-POD type 'P'}} + int i = __builtin_offsetof(P, fieldThatPointsToANonPODType.m); // expected-warning{{'offsetof' on non-POD type 'P'}} } struct Base { int x; }; struct Derived : Base { int y; }; -int o = __builtin_offsetof(Derived, x); // expected-warning{{offset of on non-POD type}} +int o = __builtin_offsetof(Derived, x); // expected-warning{{'offsetof' on non-POD type}} const int o2 = sizeof(__builtin_offsetof(Derived, x)); @@ -51,9 +51,9 @@ struct Derived2 : public Base1, public Base2 { int z; }; -int derived1[__builtin_offsetof(Derived2, x) == 0? 1 : -1]; // expected-warning{{offset of on non-POD type 'Derived2'}} -int derived2[__builtin_offsetof(Derived2, y) == 4? 1 : -1]; // expected-warning{{offset of on non-POD type 'Derived2'}} -int derived3[__builtin_offsetof(Derived2, z) == 8? 1 : -1]; // expected-warning{{offset of on non-POD type 'Derived2'}} +int derived1[__builtin_offsetof(Derived2, x) == 0? 1 : -1]; // expected-warning{{'offsetof' on non-POD type 'Derived2'}} +int derived2[__builtin_offsetof(Derived2, y) == 4? 1 : -1]; // expected-warning{{'offsetof' on non-POD type 'Derived2'}} +int derived3[__builtin_offsetof(Derived2, z) == 8? 1 : -1]; // expected-warning{{'offsetof' on non-POD type 'Derived2'}} // offsetof referring to anonymous struct in base. // PR7769 @@ -66,7 +66,7 @@ struct foo { struct bar : public foo { }; -int anonstruct[__builtin_offsetof(bar, x) == 0 ? 1 : -1]; // expected-warning{{offset of on non-POD type 'bar'}} +int anonstruct[__builtin_offsetof(bar, x) == 0 ? 1 : -1]; // expected-warning{{'offsetof' on non-POD type 'bar'}} struct LtoRCheck { @@ -81,7 +81,7 @@ struct Base { int Field; }; struct Derived : virtual Base { - void Fun() { (void)__builtin_offsetof(Derived, Field); } // expected-warning {{offset of on non-POD type}} \ + void Fun() { (void)__builtin_offsetof(Derived, Field); } // expected-warning {{'offsetof' on non-POD type}} \ expected-error {{invalid application of 'offsetof' to a field of a virtual base}} }; } diff --git a/clang/test/SemaCXX/sugar-common-types.cpp b/clang/test/SemaCXX/sugar-common-types.cpp index a21032517b2ba..d58f6cdd900fc 100644 --- a/clang/test/SemaCXX/sugar-common-types.cpp +++ b/clang/test/SemaCXX/sugar-common-types.cpp @@ -186,3 +186,19 @@ namespace arrays { // expected-error@-1 {{lvalue of type 'const volatile volatile B1[1]' (aka 'const volatile volatile int[1]')}} } // namespace balanced_qualifiers } // namespace arrays + +namespace member_pointers { + template struct W { + X1 a; + Y1 b; + }; + struct W1 : W {}; + struct W2 : W {}; + + N t1 = 0 ? &W::a : &W::b; + // expected-error@-1 {{rvalue of type 'B1 W::*'}} + + // FIXME: adjusted MemberPointer does not preserve qualifier + N t3 = 0 ? &W1::a : &W2::b; + // expected-error@-1 {{rvalue of type 'B1 W::*'}} +} // namespace member_pointers diff --git a/clang/test/SemaCXX/warn-final-dtor-non-final-class.cpp b/clang/test/SemaCXX/warn-final-dtor-non-final-class.cpp index a96aa4436e818..c9c8c11e1d7ff 100644 --- a/clang/test/SemaCXX/warn-final-dtor-non-final-class.cpp +++ b/clang/test/SemaCXX/warn-final-dtor-non-final-class.cpp @@ -1,5 +1,6 @@ -// RUN: %clang_cc1 -fsyntax-only -std=c++11 -verify %s -Wfinal-dtor-non-final-class -// RUN: %clang_cc1 -fsyntax-only -std=c++11 %s -Wfinal-dtor-non-final-class -fdiagnostics-parseable-fixits 2>&1 | FileCheck %s +// RUN: %clang_cc1 -fsyntax-only -std=c++11 -verify %s -Wfinal-dtor-non-final-class -Wno-unnecessary-virtual-specifier +// RUN: %clang_cc1 -fsyntax-only -std=c++11 %s -Wfinal-dtor-non-final-class -Wno-unnecessary-virtual-specifier \ +// RUN: -fdiagnostics-parseable-fixits 2>&1 | FileCheck %s class A { ~A(); diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 52dec2013a24f..52d922abbcaec 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -1099,7 +1099,7 @@ Expected getSymbolsFromBitcode(MemoryBufferRef Buffer, OffloadKind Kind, // If the file gets extracted we update the table with the new symbols. if (ShouldExtract) - Syms.insert(std::begin(TmpSyms), std::end(TmpSyms)); + Syms.insert_range(TmpSyms); return ShouldExtract; } @@ -1154,7 +1154,7 @@ Expected getSymbolsFromObject(const ObjectFile &Obj, OffloadKind Kind, // If the file gets extracted we update the table with the new symbols. if (ShouldExtract) - Syms.insert(std::begin(TmpSyms), std::end(TmpSyms)); + Syms.insert_range(TmpSyms); return ShouldExtract; } diff --git a/clang/unittests/Format/ConfigParseTest.cpp b/clang/unittests/Format/ConfigParseTest.cpp index 287191d04d885..2b08b794792e9 100644 --- a/clang/unittests/Format/ConfigParseTest.cpp +++ b/clang/unittests/Format/ConfigParseTest.cpp @@ -520,6 +520,14 @@ TEST(ConfigParseTest, ParsesConfiguration) { CHECK_PARSE("EmptyLineBeforeAccessModifier: Always", EmptyLineBeforeAccessModifier, FormatStyle::ELBAMS_Always); + Style.EnumTrailingComma = FormatStyle::ETC_Insert; + CHECK_PARSE("EnumTrailingComma: Leave", EnumTrailingComma, + FormatStyle::ETC_Leave); + CHECK_PARSE("EnumTrailingComma: Insert", EnumTrailingComma, + FormatStyle::ETC_Insert); + CHECK_PARSE("EnumTrailingComma: Remove", EnumTrailingComma, + FormatStyle::ETC_Remove); + Style.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; CHECK_PARSE("AlignAfterOpenBracket: Align", AlignAfterOpenBracket, FormatStyle::BAS_Align); diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 0b90bd360b758..4dfa135120605 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -27902,6 +27902,38 @@ TEST_F(FormatTest, RemoveSemicolon) { verifyFormat("STRUCT(T, B) { int i; };", Style); } +TEST_F(FormatTest, EnumTrailingComma) { + constexpr StringRef Code("enum : int { /**/ };\n" + "enum {\n" + " a,\n" + " b,\n" + " c, //\n" + "};\n" + "enum Color { red, green, blue /**/ };"); + verifyFormat(Code); + + auto Style = getLLVMStyle(); + Style.EnumTrailingComma = FormatStyle::ETC_Insert; + verifyFormat("enum : int { /**/ };\n" + "enum {\n" + " a,\n" + " b,\n" + " c, //\n" + "};\n" + "enum Color { red, green, blue, /**/ };", + Code, Style); + + Style.EnumTrailingComma = FormatStyle::ETC_Remove; + verifyFormat("enum : int { /**/ };\n" + "enum {\n" + " a,\n" + " b,\n" + " c //\n" + "};\n" + "enum Color { red, green, blue /**/ };", + Code, Style); +} + TEST_F(FormatTest, BreakAfterAttributes) { constexpr StringRef Code("[[maybe_unused]] const int i;\n" "[[foo([[]])]] [[maybe_unused]]\n" diff --git a/clang/unittests/Format/FormatTestJS.cpp b/clang/unittests/Format/FormatTestJS.cpp index 78c9f887a159b..3dae67fbcdfcb 100644 --- a/clang/unittests/Format/FormatTestJS.cpp +++ b/clang/unittests/Format/FormatTestJS.cpp @@ -828,12 +828,18 @@ TEST_F(FormatTestJS, AsyncFunctions) { "} "); // clang-format must not insert breaks between async and function, otherwise // automatic semicolon insertion may trigger (in particular in a class body). + auto Style = getGoogleJSStyleWithColumns(10); verifyFormat("async function\n" "hello(\n" " myparamnameiswaytooloooong) {\n" "}", "async function hello(myparamnameiswaytooloooong) {}", - getGoogleJSStyleWithColumns(10)); + Style); + verifyFormat("async function\n" + "union(\n" + " myparamnameiswaytooloooong) {\n" + "}", + Style); verifyFormat("class C {\n" " async hello(\n" " myparamnameiswaytooloooong) {\n" @@ -841,7 +847,7 @@ TEST_F(FormatTestJS, AsyncFunctions) { "}", "class C {\n" " async hello(myparamnameiswaytooloooong) {} }", - getGoogleJSStyleWithColumns(10)); + Style); verifyFormat("async function* f() {\n" " yield fetch(x);\n" "}"); @@ -1338,15 +1344,16 @@ TEST_F(FormatTestJS, WrapRespectsAutomaticSemicolonInsertion) { // The following statements must not wrap, as otherwise the program meaning // would change due to automatic semicolon insertion. // See http://www.ecma-international.org/ecma-262/5.1/#sec-7.9.1. - verifyFormat("return aaaaa;", getGoogleJSStyleWithColumns(10)); - verifyFormat("yield aaaaa;", getGoogleJSStyleWithColumns(10)); - verifyFormat("return /* hello! */ aaaaa;", getGoogleJSStyleWithColumns(10)); - verifyFormat("continue aaaaa;", getGoogleJSStyleWithColumns(10)); - verifyFormat("continue /* hello! */ aaaaa;", getGoogleJSStyleWithColumns(10)); - verifyFormat("break aaaaa;", getGoogleJSStyleWithColumns(10)); - verifyFormat("throw aaaaa;", getGoogleJSStyleWithColumns(10)); - verifyFormat("aaaaaaaaa++;", getGoogleJSStyleWithColumns(10)); - verifyFormat("aaaaaaaaa--;", getGoogleJSStyleWithColumns(10)); + auto Style =getGoogleJSStyleWithColumns(10); + verifyFormat("return aaaaa;", Style); + verifyFormat("yield aaaaa;", Style); + verifyFormat("return /* hello! */ aaaaa;", Style); + verifyFormat("continue aaaaa;", Style); + verifyFormat("continue /* hello! */ aaaaa;", Style); + verifyFormat("break aaaaa;", Style); + verifyFormat("throw aaaaa;", Style); + verifyFormat("aaaaaaaaa++;", Style); + verifyFormat("aaaaaaaaa--;", Style); verifyFormat("return [\n" " aaa\n" "];", @@ -1366,12 +1373,13 @@ TEST_F(FormatTestJS, WrapRespectsAutomaticSemicolonInsertion) { // Ideally the foo() bit should be indented relative to the async function(). verifyFormat("async function\n" "foo() {}", - getGoogleJSStyleWithColumns(10)); - verifyFormat("await theReckoning;", getGoogleJSStyleWithColumns(10)); - verifyFormat("some['a']['b']", getGoogleJSStyleWithColumns(10)); + Style); + verifyFormat("await theReckoning;", Style); + verifyFormat("some['a']['b']", Style); + verifyFormat("union['a']['b']", Style); verifyFormat("x = (a['a']\n" " ['b']);", - getGoogleJSStyleWithColumns(10)); + Style); verifyFormat("function f() {\n" " return foo.bar(\n" " (param): param is {\n" @@ -2500,6 +2508,10 @@ TEST_F(FormatTestJS, NonNullAssertionOperator) { TEST_F(FormatTestJS, CppKeywords) { // Make sure we don't mess stuff up because of C++ keywords. verifyFormat("return operator && (aa);"); + verifyFormat("enum operator {\n" + " A = 1,\n" + " B\n" + "}"); // .. or QT ones. verifyFormat("const slots: Slot[];"); // use the "!" assertion operator to validate that clang-format understands diff --git a/clang/unittests/Format/FormatTestJava.cpp b/clang/unittests/Format/FormatTestJava.cpp index 33998bc7ff858..e01c1d6d7e684 100644 --- a/clang/unittests/Format/FormatTestJava.cpp +++ b/clang/unittests/Format/FormatTestJava.cpp @@ -158,6 +158,8 @@ TEST_F(FormatTestJava, AnonymousClasses) { TEST_F(FormatTestJava, EnumDeclarations) { verifyFormat("enum SomeThing { ABC, CDE }"); + // A C++ keyword should not mess things up. + verifyFormat("enum union { ABC, CDE }"); verifyFormat("enum SomeThing {\n" " ABC,\n" " CDE,\n" diff --git a/clang/unittests/Format/FormatTestVerilog.cpp b/clang/unittests/Format/FormatTestVerilog.cpp index e4a14ff754d1a..5c50ae6fcfac8 100644 --- a/clang/unittests/Format/FormatTestVerilog.cpp +++ b/clang/unittests/Format/FormatTestVerilog.cpp @@ -160,6 +160,8 @@ TEST_F(FormatTestVerilog, Block) { // Test that 'disable fork' and 'rand join' don't get mistaken as blocks. verifyFormat("disable fork;\n" "x = x;"); + verifyFormat("wait fork;\n" + "x = x;"); verifyFormat("rand join x x;\n" "x = x;"); // The begin keyword should not be indented if it is too long to fit on the diff --git a/libc/config/config.json b/libc/config/config.json index d738aade74427..b2688f1b29309 100644 --- a/libc/config/config.json +++ b/libc/config/config.json @@ -89,6 +89,10 @@ "LIBC_CONF_RWLOCK_DEFAULT_SPIN_COUNT": { "value": 100, "doc": "Default number of spins before blocking if a rwlock is in contention (default to 100)." + }, + "LIBC_CONF_ENABLE_MALLOC_THREAD_CLEANUP": { + "value": false, + "doc": "Enable the `_malloc_thread_cleanup` weak symbol. When defined, this is function is called after `__cxa` and pthread-specific dtors. On main thread, this will be called after `atexit` functions and `.fini` dtors, right before TLS tearing down. This function can be overridden by allocators to perform cleanup. Allocators can use this symbol to avoid registering thread dtors using potentially reentrant routines." } }, "math": { diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 648e3d5ac5281..09c8c18c04a59 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -706,6 +706,7 @@ if(LIBC_TYPES_HAS_FLOAT16) libc.src.math.fromfpf16 libc.src.math.fromfpxf16 libc.src.math.getpayloadf16 + libc.src.math.hypotf16 libc.src.math.ilogbf16 libc.src.math.iscanonicalf16 libc.src.math.issignalingf16 diff --git a/libc/docs/configure.rst b/libc/docs/configure.rst index dee9a63101eb9..182d373c075f6 100644 --- a/libc/docs/configure.rst +++ b/libc/docs/configure.rst @@ -47,6 +47,7 @@ to learn about the defaults for your platform and target. - ``LIBC_CONF_PRINTF_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE``: Use large table for better printf long double performance. - ``LIBC_CONF_PRINTF_RUNTIME_DISPATCH``: Use dynamic dispatch for the output mechanism to reduce code size. * **"pthread" options** + - ``LIBC_CONF_ENABLE_MALLOC_THREAD_CLEANUP``: Enable the `_malloc_thread_cleanup` weak symbol. When defined, this is function is called after `__cxa` and pthread-specific dtors. On main thread, this will be called after `atexit` functions and `.fini` dtors, right before TLS tearing down. This function can be overridden by allocators to perform cleanup. Allocators can use this symbol to avoid registering thread dtors using potentially reentrant routines. - ``LIBC_CONF_RAW_MUTEX_DEFAULT_SPIN_COUNT``: Default number of spins before blocking if a mutex is in contention (default to 100). - ``LIBC_CONF_RWLOCK_DEFAULT_SPIN_COUNT``: Default number of spins before blocking if a rwlock is in contention (default to 100). - ``LIBC_CONF_TIMEOUT_ENSURE_MONOTONICITY``: Automatically adjust timeout to CLOCK_MONOTONIC (default to true). POSIX API may require CLOCK_REALTIME, which can be unstable and leading to unexpected behavior. This option will convert the real-time timestamp to monotonic timestamp relative to the time of call. diff --git a/libc/docs/headers/math/index.rst b/libc/docs/headers/math/index.rst index 23d010e2ab5d7..df2650065f882 100644 --- a/libc/docs/headers/math/index.rst +++ b/libc/docs/headers/math/index.rst @@ -305,7 +305,7 @@ Higher Math Functions +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | fsqrt | N/A | |check| | |check| | N/A | |check|\* | 7.12.14.6 | F.10.11 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ -| hypot | |check| | |check| | | | | 7.12.7.4 | F.10.4.4 | +| hypot | |check| | |check| | | |check| | | 7.12.7.4 | F.10.4.4 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | lgamma | | | | | | 7.12.8.3 | F.10.5.3 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ diff --git a/libc/include/math.yaml b/libc/include/math.yaml index 3a06bcfc4f43e..133f9a6c034ec 100644 --- a/libc/include/math.yaml +++ b/libc/include/math.yaml @@ -1395,6 +1395,14 @@ functions: arguments: - type: float - type: float + - name: hypotf16 + standards: + - stdc + return_type: _Float16 + arguments: + - type: _Float16 + - type: _Float16 + guard: LIBC_TYPES_HAS_FLOAT16 - name: ilogb standards: - stdc diff --git a/libc/src/__support/FPUtil/Hypot.h b/libc/src/__support/FPUtil/Hypot.h index 6aa808446d6d9..94da259cd42f0 100644 --- a/libc/src/__support/FPUtil/Hypot.h +++ b/libc/src/__support/FPUtil/Hypot.h @@ -30,7 +30,7 @@ LIBC_INLINE T find_leading_one(T mant, int &shift_length) { if (mant > 0) { shift_length = (sizeof(mant) * 8) - 1 - cpp::countl_zero(mant); } - return T(1) << shift_length; + return static_cast((T(1) << shift_length)); } } // namespace internal @@ -207,8 +207,10 @@ LIBC_INLINE T hypot(T x, T y) { for (StorageType current_bit = leading_one >> 1; current_bit; current_bit >>= 1) { - r = (r << 1) + ((tail_bits & current_bit) ? 1 : 0); - StorageType tmp = (y_new << 1) + current_bit; // 2*y_new(n - 1) + 2^(-n) + r = static_cast((r << 1)) + + ((tail_bits & current_bit) ? 1 : 0); + StorageType tmp = static_cast((y_new << 1)) + + current_bit; // 2*y_new(n - 1) + 2^(-n) if (r >= tmp) { r -= tmp; y_new += current_bit; diff --git a/libc/src/__support/FPUtil/cast.h b/libc/src/__support/FPUtil/cast.h index 126f3852137b7..7578bb42b18f1 100644 --- a/libc/src/__support/FPUtil/cast.h +++ b/libc/src/__support/FPUtil/cast.h @@ -18,6 +18,9 @@ namespace LIBC_NAMESPACE::fputil { +// TODO: Add optimization for known good targets with fast +// float to float16 conversion: +// https://github.com/llvm/llvm-project/issues/133517 template LIBC_INLINE constexpr cpp::enable_if_t && cpp::is_floating_point_v, diff --git a/libc/src/__support/threads/linux/CMakeLists.txt b/libc/src/__support/threads/linux/CMakeLists.txt index 364e7e2b90585..3e7c16afe0f6e 100644 --- a/libc/src/__support/threads/linux/CMakeLists.txt +++ b/libc/src/__support/threads/linux/CMakeLists.txt @@ -71,6 +71,12 @@ add_header_library( libc.src.__support.threads.mutex_common ) +if (LIBC_CONF_ENABLE_MALLOC_THREAD_CLEANUP) + set(malloc_cleanup_flags -DLIBC_COPT_ENABLE_MALLOC_THREAD_CLEANUP) +else() + set(malloc_cleanup_flags) +endif() + add_object_library( thread SRCS @@ -89,6 +95,7 @@ add_object_library( libc.src.__support.threads.thread_common COMPILE_OPTIONS ${libc_opt_high_flag} + ${malloc_cleanup_flags} -fno-omit-frame-pointer # This allows us to sniff out the thread args from # the new thread's stack reliably. -Wno-frame-address # Yes, calling __builtin_return_address with a diff --git a/libc/src/__support/threads/linux/thread.cpp b/libc/src/__support/threads/linux/thread.cpp index c531d74c53355..2d6d4e517064d 100644 --- a/libc/src/__support/threads/linux/thread.cpp +++ b/libc/src/__support/threads/linux/thread.cpp @@ -482,6 +482,10 @@ int Thread::get_name(cpp::StringStream &name) const { return 0; } +#ifdef LIBC_COPT_ENABLE_MALLOC_THREAD_CLEANUP +extern "C" [[gnu::weak]] void _malloc_thread_cleanup(); +#endif // LIBC_COPT_ENABLE_MALLOC_THREAD_CLEANUP + void thread_exit(ThreadReturnValue retval, ThreadStyle style) { auto attrib = self.attrib; @@ -494,6 +498,11 @@ void thread_exit(ThreadReturnValue retval, ThreadStyle style) { // different thread. The destructors of thread local and TSS objects should // be called by the thread which owns them. internal::call_atexit_callbacks(attrib); +#ifdef LIBC_COPT_ENABLE_MALLOC_THREAD_CLEANUP + // call _malloc_thread_cleanup after the atexit callbacks + if (_malloc_thread_cleanup) + _malloc_thread_cleanup(); +#endif // LIBC_COPT_ENABLE_MALLOC_THREAD_CLEANUP uint32_t joinable_state = uint32_t(DetachState::JOINABLE); if (!attrib->detach_state.compare_exchange_strong( diff --git a/libc/src/__support/threads/thread.cpp b/libc/src/__support/threads/thread.cpp index 6f6b75be5766d..c7135596622c6 100644 --- a/libc/src/__support/threads/thread.cpp +++ b/libc/src/__support/threads/thread.cpp @@ -154,6 +154,9 @@ ThreadAtExitCallbackMgr *get_thread_atexit_callback_mgr() { } void call_atexit_callbacks(ThreadAttributes *attrib) { + if (attrib->dtors_called) + return; + attrib->dtors_called = true; attrib->atexit_callback_mgr->call(); for (size_t i = 0; i < TSS_KEY_COUNT; ++i) { TSSValueUnit &unit = tss_values[i]; @@ -163,6 +166,8 @@ void call_atexit_callbacks(ThreadAttributes *attrib) { } } +extern "C" void __cxa_thread_finalize() { call_atexit_callbacks(self.attrib); } + } // namespace internal cpp::optional new_tss_key(TSSDtor *dtor) { diff --git a/libc/src/__support/threads/thread.h b/libc/src/__support/threads/thread.h index f2b1f6bbb253d..f7710fde2c70d 100644 --- a/libc/src/__support/threads/thread.h +++ b/libc/src/__support/threads/thread.h @@ -109,12 +109,14 @@ struct alignas(STACK_ALIGNMENT) ThreadAttributes { ThreadReturnValue retval; ThreadAtExitCallbackMgr *atexit_callback_mgr; void *platform_data; + bool dtors_called; - constexpr ThreadAttributes() + LIBC_INLINE constexpr ThreadAttributes() : detach_state(uint32_t(DetachState::DETACHED)), stack(nullptr), stacksize(0), guardsize(0), tls(0), tls_size(0), owned_stack(false), tid(-1), style(ThreadStyle::POSIX), retval(), - atexit_callback_mgr(nullptr), platform_data(nullptr) {} + atexit_callback_mgr(nullptr), platform_data(nullptr), + dtors_called(false) {} }; using TSSDtor = void(void *); diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt index 9fec978ece2bd..88fb73f856e82 100644 --- a/libc/src/math/CMakeLists.txt +++ b/libc/src/math/CMakeLists.txt @@ -313,6 +313,7 @@ add_math_entrypoint_object(getpayloadf128) add_math_entrypoint_object(hypot) add_math_entrypoint_object(hypotf) +add_math_entrypoint_object(hypotf16) add_math_entrypoint_object(ilogb) add_math_entrypoint_object(ilogbf) diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index d3454803df377..de74729465ee7 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -3105,6 +3105,22 @@ add_entrypoint_object( libc.src.__support.macros.optimization ) +add_entrypoint_object( + hypotf16 + SRCS + hypotf16.cpp + HDRS + ../hypotf16.h + DEPENDS + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.cast + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.sqrt + libc.src.__support.macros.optimization + libc.src.__support.macros.properties.types +) + add_entrypoint_object( fdim SRCS diff --git a/libc/src/math/generic/hypotf16.cpp b/libc/src/math/generic/hypotf16.cpp new file mode 100644 index 0000000000000..8f80986204b27 --- /dev/null +++ b/libc/src/math/generic/hypotf16.cpp @@ -0,0 +1,89 @@ +//===-- Implementation of hypotf16 function -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/hypotf16.h" +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/cast.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/sqrt.h" +#include "src/__support/common.h" +#include "src/__support/macros/optimization.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +// For targets where conversion from float to float16 has to be +// emulated, fputil::hypot is faster +LLVM_LIBC_FUNCTION(float16, hypotf16, (float16 x, float16 y)) { + using FloatBits = fputil::FPBits; + using FPBits = fputil::FPBits; + + FPBits x_abs = FPBits(x).abs(); + FPBits y_abs = FPBits(y).abs(); + + bool x_abs_larger = x_abs.uintval() >= y_abs.uintval(); + + FPBits a_bits = x_abs_larger ? x_abs : y_abs; + FPBits b_bits = x_abs_larger ? y_abs : x_abs; + + uint16_t a_u = a_bits.uintval(); + uint16_t b_u = b_bits.uintval(); + + // Note: replacing `a_u >= FPBits::EXP_MASK` with `a_bits.is_inf_or_nan()` + // generates extra exponent bit masking instructions on x86-64. + if (LIBC_UNLIKELY(a_u >= FPBits::EXP_MASK)) { + // x or y is inf or nan + if (a_bits.is_signaling_nan() || b_bits.is_signaling_nan()) { + fputil::raise_except_if_required(FE_INVALID); + return FPBits::quiet_nan().get_val(); + } + if (a_bits.is_inf() || b_bits.is_inf()) + return FPBits::inf().get_val(); + return a_bits.get_val(); + } + + if (LIBC_UNLIKELY(a_u - b_u >= + static_cast((FPBits::FRACTION_LEN + 2) + << FPBits::FRACTION_LEN))) + return x_abs.get_val() + y_abs.get_val(); + + float af = fputil::cast(a_bits.get_val()); + float bf = fputil::cast(b_bits.get_val()); + + // These squares are exact. + float a_sq = af * af; + float sum_sq = fputil::multiply_add(bf, bf, a_sq); + + FloatBits result(fputil::sqrt(sum_sq)); + uint32_t r_u = result.uintval(); + + // If any of the sticky bits of the result are non-zero, except the LSB, then + // the rounded result is correct. + if (LIBC_UNLIKELY(((r_u + 1) & 0x0000'0FFE) == 0)) { + float r_d = result.get_val(); + + // Perform rounding correction. + float sum_sq_lo = fputil::multiply_add(bf, bf, a_sq - sum_sq); + float err = sum_sq_lo - fputil::multiply_add(r_d, r_d, -sum_sq); + + if (err > 0) { + r_u |= 1; + } else if ((err < 0) && (r_u & 1) == 0) { + r_u -= 1; + } else if ((r_u & 0x0000'1FFF) == 0) { + // The rounded result is exact. + fputil::clear_except_if_required(FE_INEXACT); + } + return fputil::cast(FloatBits(r_u).get_val()); + } + + return fputil::cast(result.get_val()); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/hypotf16.h b/libc/src/math/hypotf16.h new file mode 100644 index 0000000000000..2d37c61b4ee7b --- /dev/null +++ b/libc/src/math/hypotf16.h @@ -0,0 +1,21 @@ +//===-- Implementation header for hypotf16 ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_HYPOTF16_H +#define LLVM_LIBC_SRC_MATH_HYPOTF16_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +float16 hypotf16(float16 x, float16 y); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_HYPOTF16_H diff --git a/libc/src/stdlib/CMakeLists.txt b/libc/src/stdlib/CMakeLists.txt index 74ae864f72e23..7dd0c969cf9b2 100644 --- a/libc/src/stdlib/CMakeLists.txt +++ b/libc/src/stdlib/CMakeLists.txt @@ -589,6 +589,12 @@ add_header_library( ) endif() +if (LIBC_CONF_ENABLE_MALLOC_THREAD_CLEANUP) + set(malloc_cleanup_flags -DLIBC_COPT_ENABLE_MALLOC_THREAD_CLEANUP) +else() + set(malloc_cleanup_flags) +endif() + add_entrypoint_object( atexit SRCS @@ -599,6 +605,8 @@ add_entrypoint_object( 20 # For constinit DEPENDS .exit_handler + COMPILE_OPTIONS + ${malloc_cleanup_flags} ) add_entrypoint_object( diff --git a/libc/src/stdlib/atexit.cpp b/libc/src/stdlib/atexit.cpp index 799aad136bda5..979ed1f29b642 100644 --- a/libc/src/stdlib/atexit.cpp +++ b/libc/src/stdlib/atexit.cpp @@ -18,6 +18,10 @@ constinit ExitCallbackList atexit_callbacks; Mutex handler_list_mtx(false, false, false, false); [[gnu::weak]] extern void teardown_main_tls(); +namespace internal { +[[gnu::weak]] extern void call_atexit_callbacks(); +} + extern "C" { int __cxa_atexit(AtExitCallback *callback, void *payload, void *) { diff --git a/libc/src/stdlib/exit.cpp b/libc/src/stdlib/exit.cpp index 28a6f8a63c0c6..097a52339e5e8 100644 --- a/libc/src/stdlib/exit.cpp +++ b/libc/src/stdlib/exit.cpp @@ -14,8 +14,12 @@ namespace LIBC_NAMESPACE_DECL { extern "C" void __cxa_finalize(void *); +extern "C" [[gnu::weak]] void __cxa_thread_finalize(); +// TODO: use recursive mutex to protect this routine. [[noreturn]] LLVM_LIBC_FUNCTION(void, exit, (int status)) { + if (__cxa_thread_finalize) + __cxa_thread_finalize(); __cxa_finalize(nullptr); internal::exit(status); } diff --git a/libc/test/integration/src/__support/threads/CMakeLists.txt b/libc/test/integration/src/__support/threads/CMakeLists.txt index 5a12d28ada3fd..40e96681b1207 100644 --- a/libc/test/integration/src/__support/threads/CMakeLists.txt +++ b/libc/test/integration/src/__support/threads/CMakeLists.txt @@ -25,3 +25,24 @@ add_integration_test( DEPENDS libc.src.__support.threads.thread ) + +add_integration_test( + main_exit_test + SUITE + libc-support-threads-integration-tests + SRCS + main_exit_test.cpp + DEPENDS + libc.src.__support.threads.thread +) + +add_integration_test( + double_exit_test + SUITE + libc-support-threads-integration-tests + SRCS + double_exit_test.cpp + DEPENDS + libc.src.__support.threads.thread + libc.src.stdlib.exit +) diff --git a/libc/test/integration/src/__support/threads/double_exit_test.cpp b/libc/test/integration/src/__support/threads/double_exit_test.cpp new file mode 100644 index 0000000000000..e4a163644a970 --- /dev/null +++ b/libc/test/integration/src/__support/threads/double_exit_test.cpp @@ -0,0 +1,23 @@ +//===-- Test handling of thread local data --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/threads/thread.h" +#include "src/stdlib/exit.h" +#include "test/IntegrationTest/test.h" + +extern "C" { +[[gnu::weak]] +void *__dso_handle = nullptr; +int __cxa_thread_atexit_impl(void (*func)(void *), void *arg, void *dso); +} + +TEST_MAIN() { + __cxa_thread_atexit_impl([](void *) { LIBC_NAMESPACE::exit(0); }, nullptr, + __dso_handle); + return 0; +} diff --git a/libc/test/integration/src/__support/threads/main_exit_test.cpp b/libc/test/integration/src/__support/threads/main_exit_test.cpp new file mode 100644 index 0000000000000..c90e4e569cfba --- /dev/null +++ b/libc/test/integration/src/__support/threads/main_exit_test.cpp @@ -0,0 +1,30 @@ +//===-- Test handling of thread local data --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/threads/thread.h" +#include "test/IntegrationTest/test.h" + +bool called = false; + +extern "C" { +[[gnu::weak]] +void *__dso_handle = nullptr; +int __cxa_thread_atexit_impl(void (*func)(void *), void *arg, void *dso); +} + +[[gnu::destructor]] +void destructor() { + if (!called) + __builtin_trap(); +} + +TEST_MAIN() { + __cxa_thread_atexit_impl([](void *) { called = true; }, nullptr, + __dso_handle); + return 0; +} diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt index 9a73f9fe07597..514c01834c1a4 100644 --- a/libc/test/src/math/CMakeLists.txt +++ b/libc/test/src/math/CMakeLists.txt @@ -1701,6 +1701,17 @@ add_fp_unittest( libc.src.__support.FPUtil.fp_bits ) +add_fp_unittest( + hypotf16_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + hypotf16_test.cpp + DEPENDS + libc.src.math.hypotf16 +) + add_fp_unittest( nextafter_test SUITE diff --git a/libc/test/src/math/HypotTest.h b/libc/test/src/math/HypotTest.h index fd0c1b394b8f7..dc73581e67ff0 100644 --- a/libc/test/src/math/HypotTest.h +++ b/libc/test/src/math/HypotTest.h @@ -73,7 +73,7 @@ class HypotTestTemplate : public LIBC_NAMESPACE::testing::FEnvSafeTest { constexpr StorageType COUNT = 10'001; for (unsigned scale = 0; scale < 4; ++scale) { StorageType max_value = MAX_SUBNORMAL << scale; - StorageType step = (max_value - MIN_SUBNORMAL) / COUNT; + StorageType step = (max_value - MIN_SUBNORMAL) / COUNT + 1; for (int signs = 0; signs < 4; ++signs) { for (StorageType v = MIN_SUBNORMAL, w = max_value; v <= max_value && w >= MIN_SUBNORMAL; v += step, w -= step) { diff --git a/libc/test/src/math/exhaustive/CMakeLists.txt b/libc/test/src/math/exhaustive/CMakeLists.txt index b1927dbc19a3b..551f449c9c8db 100644 --- a/libc/test/src/math/exhaustive/CMakeLists.txt +++ b/libc/test/src/math/exhaustive/CMakeLists.txt @@ -314,6 +314,24 @@ add_fp_unittest( -lpthread ) +add_fp_unittest( + hypotf16_test + NO_RUN_POSTBUILD + NEED_MPFR + SUITE + libc_math_exhaustive_tests + SRCS + hypotf16_test.cpp + COMPILE_OPTIONS + ${libc_opt_high_flag} + DEPENDS + .exhaustive_test + libc.src.math.hypotf16 + libc.src.__support.FPUtil.fp_bits + LINK_LIBRARIES + -lpthread +) + add_fp_unittest( fmod_generic_impl_test NO_RUN_POSTBUILD diff --git a/libc/test/src/math/exhaustive/hypotf16_test.cpp b/libc/test/src/math/exhaustive/hypotf16_test.cpp new file mode 100644 index 0000000000000..f79041e6dbd77 --- /dev/null +++ b/libc/test/src/math/exhaustive/hypotf16_test.cpp @@ -0,0 +1,67 @@ +//===-- Exhaustive test for hypotf16 --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "exhaustive_test.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/Hypot.h" +#include "src/math/hypotf16.h" +#include "test/UnitTest/FPMatcher.h" +#include "utils/MPFRWrapper/MPFRUtils.h" + +namespace mpfr = LIBC_NAMESPACE::testing::mpfr; + +struct Hypotf16Checker : public virtual LIBC_NAMESPACE::testing::Test { + using FloatType = float16; + using FPBits = LIBC_NAMESPACE::fputil::FPBits; + using StorageType = typename FPBits::StorageType; + + uint64_t check(uint16_t x_start, uint16_t x_stop, uint16_t y_start, + uint16_t y_stop, mpfr::RoundingMode rounding) { + mpfr::ForceRoundingMode r(rounding); + if (!r.success) + return true; + uint16_t xbits = x_start; + uint64_t failed = 0; + do { + float16 x = FPBits(xbits).get_val(); + uint16_t ybits = xbits; + do { + float16 y = FPBits(ybits).get_val(); + bool correct = TEST_FP_EQ(LIBC_NAMESPACE::fputil::hypot(x, y), + LIBC_NAMESPACE::hypotf16(x, y)); + // Using MPFR will be much slower. + // mpfr::BinaryInput input{x, y}; + // bool correct = TEST_MPFR_MATCH_ROUNDING_SILENTLY( + // mpfr::Operation::Hypot, input, LIBC_NAMESPACE::hypotf16(x, y), + // 0.5, + // rounding); + failed += (!correct); + } while (ybits++ < y_stop); + } while (xbits++ < x_stop); + return failed; + } +}; + +using LlvmLibcHypotf16ExhaustiveTest = + LlvmLibcExhaustiveMathTest; + +// Range of both inputs: [0, inf] +static constexpr uint16_t POS_START = 0x0000U; +static constexpr uint16_t POS_STOP = 0x7C00U; + +TEST_F(LlvmLibcHypotf16ExhaustiveTest, PositiveRange) { + test_full_range_all_roundings(POS_START, POS_STOP, POS_START, POS_STOP); +} + +// Range of both inputs: [-0, -inf] +static constexpr uint16_t NEG_START = 0x8000U; +static constexpr uint16_t NEG_STOP = 0xFC00U; + +TEST_F(LlvmLibcHypotf16ExhaustiveTest, NegativeRange) { + test_full_range_all_roundings(NEG_START, NEG_STOP, NEG_START, NEG_STOP); +} diff --git a/libc/test/src/math/hypotf16_test.cpp b/libc/test/src/math/hypotf16_test.cpp new file mode 100644 index 0000000000000..37d57471a3c74 --- /dev/null +++ b/libc/test/src/math/hypotf16_test.cpp @@ -0,0 +1,21 @@ +//===-- Unittests for hypotf16 --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "HypotTest.h" + +#include "src/math/hypotf16.h" + +using LlvmLibcHypotf16Test = HypotTestTemplate; + +TEST_F(LlvmLibcHypotf16Test, SubnormalRange) { + test_subnormal_range(&LIBC_NAMESPACE::hypotf16); +} + +TEST_F(LlvmLibcHypotf16Test, NormalRange) { + test_normal_range(&LIBC_NAMESPACE::hypotf16); +} diff --git a/libc/test/src/math/performance_testing/CMakeLists.txt b/libc/test/src/math/performance_testing/CMakeLists.txt index 838ed9e957ca7..d8f87e04c15d8 100644 --- a/libc/test/src/math/performance_testing/CMakeLists.txt +++ b/libc/test/src/math/performance_testing/CMakeLists.txt @@ -340,6 +340,18 @@ add_perf_binary( -fno-builtin ) +add_perf_binary( + hypotf16_perf + SRCS + hypotf16_perf.cpp + DEPENDS + .binary_op_single_output_diff + libc.src.math.hypotf16 + libc.src.__support.FPUtil.fp_bits + COMPILE_OPTIONS + -fno-builtin +) + add_perf_binary( hypotf_perf SRCS diff --git a/libc/test/src/math/performance_testing/hypotf16_perf.cpp b/libc/test/src/math/performance_testing/hypotf16_perf.cpp new file mode 100644 index 0000000000000..b53a9042171a6 --- /dev/null +++ b/libc/test/src/math/performance_testing/hypotf16_perf.cpp @@ -0,0 +1,16 @@ +//===-- Differential test for hypotf16 ------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "BinaryOpSingleOutputPerf.h" + +#include "src/__support/FPUtil/Hypot.h" +#include "src/math/hypotf16.h" + +BINARY_OP_SINGLE_OUTPUT_PERF(float16, float16, LIBC_NAMESPACE::hypotf16, + LIBC_NAMESPACE::fputil::hypot, + "hypotf16_perf.log") diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt index b7601735c1109..a8c602b388504 100644 --- a/libc/test/src/math/smoke/CMakeLists.txt +++ b/libc/test/src/math/smoke/CMakeLists.txt @@ -3121,6 +3121,19 @@ add_fp_unittest( libc.src.__support.macros.properties.architectures ) +add_fp_unittest( + hypotf16_test + SUITE + libc-math-smoke-tests + SRCS + hypotf16_test.cpp + HDRS + HypotTest.h + DEPENDS + libc.src.math.hypotf16 + libc.src.__support.FPUtil.fp_bits +) + add_fp_unittest( hypot_test SUITE diff --git a/libc/test/src/math/smoke/hypotf16_test.cpp b/libc/test/src/math/smoke/hypotf16_test.cpp new file mode 100644 index 0000000000000..b48b0930431de --- /dev/null +++ b/libc/test/src/math/smoke/hypotf16_test.cpp @@ -0,0 +1,17 @@ +//===-- Unittests for hypotf16 --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "HypotTest.h" + +#include "src/math/hypotf16.h" + +using LlvmLibcHypotf16Test = HypotTestTemplate; + +TEST_F(LlvmLibcHypotf16Test, SpecialNumbers) { + test_special_numbers(&LIBC_NAMESPACE::hypotf16); +} diff --git a/libcxx/include/__config b/libcxx/include/__config index 30fe0ef6a3b53..ea51d30dcda99 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -363,25 +363,22 @@ typedef __char32_t char32_t; # endif # if defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS) || (defined(__MINGW32__) && !defined(_LIBCPP_BUILDING_LIBRARY)) -# define _LIBCPP_DLL_VIS # define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS # define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS # define _LIBCPP_OVERRIDABLE_FUNC_VIS # define _LIBCPP_EXPORTED_FROM_ABI # elif defined(_LIBCPP_BUILDING_LIBRARY) -# define _LIBCPP_DLL_VIS __declspec(dllexport) # if defined(__MINGW32__) -# define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS _LIBCPP_DLL_VIS +# define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __declspec(dllexport) # define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS # else # define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS -# define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS _LIBCPP_DLL_VIS +# define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS __declspec(dllexport) # endif -# define _LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_DLL_VIS +# define _LIBCPP_OVERRIDABLE_FUNC_VIS __declspec(dllexport) # define _LIBCPP_EXPORTED_FROM_ABI __declspec(dllexport) # else -# define _LIBCPP_DLL_VIS __declspec(dllimport) -# define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS _LIBCPP_DLL_VIS +# define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __declspec(dllimport) # define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS # define _LIBCPP_OVERRIDABLE_FUNC_VIS # define _LIBCPP_EXPORTED_FROM_ABI __declspec(dllimport) diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index 709b31ed4e01a..4edc625b05cb0 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -766,9 +766,12 @@ static bool isPairRelaxable(ArrayRef relocs, size_t i) { // Relax code sequence. // From: // pcalau12i $a0, %pc_hi20(sym) | %ld_pc_hi20(sym) | %gd_pc_hi20(sym) +// | %desc_pc_hi20(sym) // addi.w/d $a0, $a0, %pc_lo12(sym) | %got_pc_lo12(sym) | %got_pc_lo12(sym) +// | %desc_pc_lo12(sym) // To: -// pcaddi $a0, %pc_lo12(sym) | %got_pc_lo12(sym) | %got_pc_lo12(sym) +// pcaddi $a0, %pc_lo12(sym) | %got_pc_lo12(sym) | %got_pc_lo12(sym) +// | %desc_pcrel_20(sym) // // From: // pcalau12i $a0, %got_pc_hi20(sym_got) @@ -786,7 +789,9 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i, (rHi20.type == R_LARCH_TLS_GD_PC_HI20 && rLo12.type == R_LARCH_GOT_PC_LO12) || (rHi20.type == R_LARCH_TLS_LD_PC_HI20 && - rLo12.type == R_LARCH_GOT_PC_LO12))) + rLo12.type == R_LARCH_GOT_PC_LO12) || + (rHi20.type == R_LARCH_TLS_DESC_PC_HI20 && + rLo12.type == R_LARCH_TLS_DESC_PC_LO12))) return; // GOT references to absolute symbols can't be relaxed to use pcaddi in @@ -808,6 +813,8 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i, dest = rHi20.sym->getVA(ctx); else if (rHi20.expr == RE_LOONGARCH_TLSGD_PAGE_PC) dest = ctx.in.got->getGlobalDynAddr(*rHi20.sym); + else if (rHi20.expr == RE_LOONGARCH_TLSDESC_PAGE_PC) + dest = ctx.in.got->getTlsDescAddr(*rHi20.sym); else { Err(ctx) << getErrorLoc(ctx, (const uint8_t *)loc) << "unknown expr (" << rHi20.expr << ") against symbol " << rHi20.sym @@ -841,6 +848,8 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i, sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_GD_PCREL20_S2; else if (rHi20.type == R_LARCH_TLS_LD_PC_HI20) sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_LD_PCREL20_S2; + else if (rHi20.type == R_LARCH_TLS_DESC_PC_HI20) + sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_DESC_PCREL20_S2; else sec.relaxAux->relocTypes[i + 2] = R_LARCH_PCREL20_S2; sec.relaxAux->writes.push_back(insn(PCADDI, getD5(nextInsn), 0, 0)); @@ -947,6 +956,7 @@ static bool relax(Ctx &ctx, InputSection &sec) { case R_LARCH_GOT_PC_HI20: case R_LARCH_TLS_GD_PC_HI20: case R_LARCH_TLS_LD_PC_HI20: + case R_LARCH_TLS_DESC_PC_HI20: // The overflow check for i+2 will be carried out in isPairRelaxable. if (isPairRelaxable(relocs, i)) relaxPCHi20Lo12(ctx, sec, i, loc, r, relocs[i + 2], remove); @@ -1081,6 +1091,11 @@ void LoongArch::finalizeRelax(int passes) const { write32le(p, aux.writes[writesIdx++]); r.expr = R_TLSGD_PC; break; + case R_LARCH_TLS_DESC_PCREL20_S2: + skip = 4; + write32le(p, aux.writes[writesIdx++]); + r.expr = R_TLSDESC_PC; + break; default: llvm_unreachable("unsupported type"); } diff --git a/lld/test/ELF/loongarch-relax-tlsdesc.s b/lld/test/ELF/loongarch-relax-tlsdesc.s new file mode 100644 index 0000000000000..9ce7c5881ca96 --- /dev/null +++ b/lld/test/ELF/loongarch-relax-tlsdesc.s @@ -0,0 +1,280 @@ +# REQUIRES: loongarch +# RUN: rm -rf %t && split-file %s %t && cd %t +# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax a.s -o a.64.o +# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax c.s -o c.64.o +# RUN: ld.lld --relax -shared -soname=c.64.so c.64.o -o c.64.so + +## Test the TLSDESC relaxation. +# RUN: ld.lld --relax -shared -z now a.64.o c.64.o -o a.64.so +# RUN: llvm-readobj -r -x .got a.64.so | FileCheck --check-prefix=GD64-RELA %s +# RUN: llvm-objdump --no-show-raw-insn -dr -h a.64.so | FileCheck %s --check-prefix=GD64 + +## FIXME: The transition from TLSDESC to IE/LE has not yet been implemented. +## Keep the dynamic relocations and hand them over to dynamic linker. + +# RUN: ld.lld --relax -e 0 -z now a.64.o c.64.o -o a.64.le +# RUN: llvm-readobj -r -x .got a.64.le | FileCheck --check-prefix=LE64-RELA %s +# RUN: llvm-objdump --no-show-raw-insn -d -h a.64.le | FileCheck %s --check-prefix=LE64 + +# RUN: ld.lld --no-relax -e 0 -z now a.64.o c.64.o -o a.64.le.norelax +# RUN: llvm-objdump --no-show-raw-insn -d -h a.64.le.norelax | FileCheck %s --check-prefix=LE64-NORELAX + +# RUN: ld.lld --relax -e 0 -z now a.64.o c.64.so -o a.64.ie +# RUN: llvm-readobj -r -x .got a.64.ie | FileCheck --check-prefix=IE64-RELA %s +# RUN: llvm-objdump --no-show-raw-insn -d -h a.64.ie | FileCheck %s --check-prefix=IE64 + +# RUN: ld.lld --no-relax -e 0 -z now a.64.o c.64.so -o a.64.ie.norelax +# RUN: llvm-objdump --no-show-raw-insn -d -h a.64.ie.norelax | FileCheck %s --check-prefix=IE64-NORELAX + +# GD64-RELA: .rela.dyn { +# GD64-RELA-NEXT: 0x20460 R_LARCH_TLS_DESC64 - 0x7FF +# GD64-RELA-NEXT: 0x20430 R_LARCH_TLS_DESC64 a 0x0 +# GD64-RELA-NEXT: 0x20440 R_LARCH_TLS_DESC64 c 0x0 +# GD64-RELA-NEXT: 0x20450 R_LARCH_TLS_DESC64 d 0x0 +# GD64-RELA-NEXT: } +# GD64-RELA: Hex dump of section '.got': +# GD64-RELA-NEXT: 0x00020430 00000000 00000000 00000000 00000000 . +# GD64-RELA-NEXT: 0x00020440 00000000 00000000 00000000 00000000 . +# GD64-RELA-NEXT: 0x00020450 00000000 00000000 00000000 00000000 . +# GD64-RELA-NEXT: 0x00020460 00000000 00000000 00000000 00000000 . + +# GD64: .got 00000040 0000000000020430 + +## &.got[a]-. = 0x20430 - 0x10318 = 16454<<2 +# GD64: 10318: pcaddi $a0, 16454 +# GD64-NEXT: ld.d $ra, $a0, 0 +# GD64-NEXT: jirl $ra, $ra, 0 +# GD64-NEXT: add.d $a1, $a0, $tp + +## &.got[b]-. = 0x20430+48 - 0x10328: 0x10 pages, page offset 0x460 +## R_LARCH_RELAX does not appear in pairs. No relaxation. +# GD64: 10328: pcalau12i $a0, 16 +# GD64-NEXT: addi.d $a0, $a0, 1120 +# GD64-NEXT: ld.d $ra, $a0, 0 +# GD64-NEXT: jirl $ra, $ra, 0 +# GD64-NEXT: add.d $a2, $a0, $tp + +## &.got[c]-. = 0x20430+16 - 0x1033c: 0x10 pages, page offset 0x440 +## Without R_LARCH_RELAX relocation. No relaxation. +# GD64: 1033c: pcalau12i $a0, 16 +# GD64-NEXT: addi.d $t0, $zero, 0 +# GD64-NEXT: addi.d $a0, $a0, 1088 +# GD64-NEXT: addi.d $t0, $t0, 1 +# GD64-NEXT: ld.d $ra, $a0, 0 +# GD64-NEXT: addi.d $t0, $t0, 1 +# GD64-NEXT: jirl $ra, $ra, 0 +# GD64-NEXT: add.d $a3, $a0, $tp + +## &.got[d]-. = 0x20430+32 - 0x1035c = 16445<<2 +# GD64: 1035c: pcaddi $a0, 16445 +# GD64-NEXT: ld.d $ra, $a0, 0 +# GD64-NEXT: jirl $ra, $ra, 0 +# GD64-NEXT: add.d $a4, $a0, $tp + +# LE64-RELA: .rela.dyn { +# LE64-RELA-NEXT: 0x30280 R_LARCH_TLS_DESC64 - 0x8 +# LE64-RELA-NEXT: 0x30290 R_LARCH_TLS_DESC64 - 0x800 +# LE64-RELA-NEXT: 0x302A0 R_LARCH_TLS_DESC64 - 0x1000 +# LE64-RELA-NEXT: 0x302B0 R_LARCH_TLS_DESC64 - 0x7FF +# LE64-RELA-NEXT: } +# LE64-RELA: Hex dump of section '.got': +# LE64-RELA-NEXT: 0x00030280 00000000 00000000 00000000 00000000 . +# LE64-RELA-NEXT: 0x00030290 00000000 00000000 00000000 00000000 . +# LE64-RELA-NEXT: 0x000302a0 00000000 00000000 00000000 00000000 . +# LE64-RELA-NEXT: 0x000302b0 00000000 00000000 00000000 00000000 . + +# LE64: .got 00000040 0000000000030280 + +## &.got[a]-. = 0x30280 - 0x20228 = 16406<<2 +# LE64: 20228: pcaddi $a0, 16406 +# LE64-NEXT: ld.d $ra, $a0, 0 +# LE64-NEXT: jirl $ra, $ra, 0 +# LE64-NEXT: add.d $a1, $a0, $tp + +## &.got[b]-. = 0x30280+48 - 0x20238: 0x10 pages, page offset 0x2b0 +## R_LARCH_RELAX does not appear in pairs. No relaxation. +# LE64: 20238: pcalau12i $a0, 16 +# LE64-NEXT: addi.d $a0, $a0, 688 +# LE64-NEXT: ld.d $ra, $a0, 0 +# LE64-NEXT: jirl $ra, $ra, 0 +# LE64-NEXT: add.d $a2, $a0, $tp + +## &.got[c]-. = 0x30280+16 - 0x2024c: 0x10 pages, page offset 0x290 +## Without R_LARCH_RELAX relocation. No relaxation. +# LE64: 2024c: pcalau12i $a0, 16 +# LE64-NEXT: addi.d $t0, $zero, 0 +# LE64-NEXT: addi.d $a0, $a0, 656 +# LE64-NEXT: addi.d $t0, $t0, 1 +# LE64-NEXT: ld.d $ra, $a0, 0 +# LE64-NEXT: addi.d $t0, $t0, 1 +# LE64-NEXT: jirl $ra, $ra, 0 +# LE64-NEXT: add.d $a3, $a0, $tp + +## &.got[d]-. = 0x30280+32 - 0x2026c = 16397<<2 +# LE64: 2026c: pcaddi $a0, 16397 +# LE64-NEXT: ld.d $ra, $a0, 0 +# LE64-NEXT: jirl $ra, $ra, 0 +# LE64-NEXT: add.d $a4, $a0, $tp + +# LE64-NORELAX: .got 00000040 0000000000030288 + +## &.got[a]-. = 0x30288 - 0x20228 = 0x10 pages, page offset 0x288 +# LE64-NORELAX: 20228: pcalau12i $a0, 16 +# LE64-NORELAX-NEXT: addi.d $a0, $a0, 648 +# LE64-NORELAX-NEXT: ld.d $ra, $a0, 0 +# LE64-NORELAX-NEXT: jirl $ra, $ra, 0 +# LE64-NORELAX-NEXT: add.d $a1, $a0, $tp + +## &.got[b]-. = 0x30288+48 - 0x2023c: 0x10 pages, page offset 0x2b8 +## R_LARCH_RELAX does not appear in pairs. No relaxation. +# LE64-NORELAX: 2023c: pcalau12i $a0, 16 +# LE64-NORELAX-NEXT: addi.d $a0, $a0, 696 +# LE64-NORELAX-NEXT: ld.d $ra, $a0, 0 +# LE64-NORELAX-NEXT: jirl $ra, $ra, 0 +# LE64-NORELAX-NEXT: add.d $a2, $a0, $tp + +## &.got[c]-. = 0x30288+16 - 0x20250: 0x10 pages, page offset 0x298 +## Without R_LARCH_RELAX relocation. No relaxation. +# LE64-NORELAX: 20250: pcalau12i $a0, 16 +# LE64-NORELAX-NEXT: addi.d $t0, $zero, 0 +# LE64-NORELAX-NEXT: addi.d $a0, $a0, 664 +# LE64-NORELAX-NEXT: addi.d $t0, $t0, 1 +# LE64-NORELAX-NEXT: ld.d $ra, $a0, 0 +# LE64-NORELAX-NEXT: addi.d $t0, $t0, 1 +# LE64-NORELAX-NEXT: jirl $ra, $ra, 0 +# LE64-NORELAX-NEXT: add.d $a3, $a0, $tp + +## &.got[d]-. = 0x30288+32 - 0x20270: 0x10 pages, page offset 0x2a8 +# LE64-NORELAX: 20270: pcalau12i $a0, 16 +# LE64-NORELAX-NEXT: addi.d $a0, $a0, 680 +# LE64-NORELAX-NEXT: ld.d $ra, $a0, 0 +# LE64-NORELAX-NEXT: jirl $ra, $ra, 0 +# LE64-NORELAX-NEXT: add.d $a4, $a0, $tp + +# IE64-RELA: .rela.dyn { +# IE64-RELA-NEXT: 0x30430 R_LARCH_TLS_DESC64 - 0x8 +# IE64-RELA-NEXT: 0x30460 R_LARCH_TLS_DESC64 - 0x7FF +# IE64-RELA-NEXT: 0x30440 R_LARCH_TLS_DESC64 c 0x0 +# IE64-RELA-NEXT: 0x30450 R_LARCH_TLS_DESC64 d 0x0 +# IE64-RELA-NEXT: } +# IE64-RELA: Hex dump of section '.got': +# IE64-RELA-NEXT: 0x00030430 00000000 00000000 00000000 00000000 . +# IE64-RELA-NEXT: 0x00030440 00000000 00000000 00000000 00000000 . +# IE64-RELA-NEXT: 0x00030450 00000000 00000000 00000000 00000000 . +# IE64-RELA-NEXT: 0x00030460 00000000 00000000 00000000 00000000 . + +# IE64: .got 00000040 0000000000030430 + +## a and b are optimized to use LE. c and d are optimized to IE. +## &.got[a]-. = 0x30430 - 0x202f8 = 16462<<2 +# IE64: 202f8: pcaddi $a0, 16462 +# IE64-NEXT: ld.d $ra, $a0, 0 +# IE64-NEXT: jirl $ra, $ra, 0 +# IE64-NEXT: add.d $a1, $a0, $tp + +## &.got[b]-. = 0x30430+48 - 0x20308: 0x10 pages, page offset 0x460 +## R_LARCH_RELAX does not appear in pairs. No relaxation. +# IE64: 20308: pcalau12i $a0, 16 +# IE64-NEXT: addi.d $a0, $a0, 1120 +# IE64-NEXT: ld.d $ra, $a0, 0 +# IE64-NEXT: jirl $ra, $ra, 0 +# IE64-NEXT: add.d $a2, $a0, $tp + +## &.got[c]-. = 0x30430+16 - 0x2031c: 0x10 pages, page offset 0x440 +## Without R_LARCH_RELAX relocation. No relaxation. +# IE64: 2031c: pcalau12i $a0, 16 +# IE64-NEXT: addi.d $t0, $zero, 0 +# IE64-NEXT: addi.d $a0, $a0, 1088 +# IE64-NEXT: addi.d $t0, $t0, 1 +# IE64-NEXT: ld.d $ra, $a0, 0 +# IE64-NEXT: addi.d $t0, $t0, 1 +# IE64-NEXT: jirl $ra, $ra, 0 +# IE64-NEXT: add.d $a3, $a0, $tp + +## &.got[d]-. = 0x30430+32 - 0x2033c = 16453<<2 +# IE64: 2033c: pcaddi $a0, 16453 +# IE64-NEXT: ld.d $ra, $a0, 0 +# IE64-NEXT: jirl $ra, $ra, 0 +# IE64-NEXT: add.d $a4, $a0, $tp + +# IE64-NORELAX: .got 00000040 0000000000030438 + +## &.got[a]-. = 0x30438 - 0x202f8 = 0x10 pages, page offset 0x438 +# IE64-NORELAX: 202f8: pcalau12i $a0, 16 +# IE64-NORELAX-NEXT: addi.d $a0, $a0, 1080 +# IE64-NORELAX-NEXT: ld.d $ra, $a0, 0 +# IE64-NORELAX-NEXT: jirl $ra, $ra, 0 +# IE64-NORELAX-NEXT: add.d $a1, $a0, $tp + +## &.got[b]-. = 0x30438+48 - 0x2030c: 0x10 pages, page offset 0x468 +## R_LARCH_RELAX does not appear in pairs. No relaxation. +# IE64-NORELAX: 2030c: pcalau12i $a0, 16 +# IE64-NORELAX-NEXT: addi.d $a0, $a0, 1128 +# IE64-NORELAX-NEXT: ld.d $ra, $a0, 0 +# IE64-NORELAX-NEXT: jirl $ra, $ra, 0 +# IE64-NORELAX-NEXT: add.d $a2, $a0, $tp + +## &.got[c]-. = 0x30438+16 - 0x20320: 0x10 pages, page offset 0x448 +## Without R_LARCH_RELAX relocation. No relaxation. +# IE64-NORELAX: 20320: pcalau12i $a0, 16 +# IE64-NORELAX-NEXT: addi.d $t0, $zero, 0 +# IE64-NORELAX-NEXT: addi.d $a0, $a0, 1096 +# IE64-NORELAX-NEXT: addi.d $t0, $t0, 1 +# IE64-NORELAX-NEXT: ld.d $ra, $a0, 0 +# IE64-NORELAX-NEXT: addi.d $t0, $t0, 1 +# IE64-NORELAX-NEXT: jirl $ra, $ra, 0 +# IE64-NORELAX-NEXT: add.d $a3, $a0, $tp + +## &.got[d]-. = 0x30438+32 - 0x20340: 0x10 pages, page offset 0x458 +# IE64-NORELAX: 20340: pcalau12i $a0, 16 +# IE64-NORELAX-NEXT: addi.d $a0, $a0, 1112 +# IE64-NORELAX-NEXT: ld.d $ra, $a0, 0 +# IE64-NORELAX-NEXT: jirl $ra, $ra, 0 +# IE64-NORELAX-NEXT: add.d $a4, $a0, $tp + +#--- a.s +la.tls.desc $a0, a +add.d $a1, $a0, $tp + +# ADDI.D does not have R_LARCH_RELAX. No relaxation. +pcalau12i $a0, %desc_pc_hi20(b) +.reloc .-4, R_LARCH_RELAX, 0 +addi.d $a0, $a0, %desc_pc_lo12(b) +ld.d $ra, $a0, %desc_ld(b) +jirl $ra, $ra, %desc_call(b) +add.d $a2, $a0, $tp + +# TLSDESC to LE. No relaxation. +pcalau12i $a0, %desc_pc_hi20(c) +addi.d $t0, $zero, 0 +addi.d $a0, $a0, %desc_pc_lo12(c) +addi.d $t0, $t0, 1 +ld.d $ra, $a0, %desc_ld(c) +addi.d $t0, $t0, 1 +jirl $ra, $ra, %desc_call(c) +add.d $a3, $a0, $tp + +# PCALAU12I and ADDI.D have R_LARCH_RELAX. We perform relaxation. +pcalau12i $a0, %desc_pc_hi20(d) +.reloc .-4, R_LARCH_RELAX, 0 +addi.d $a0, $a0, %desc_pc_lo12(d) +.reloc .-4, R_LARCH_RELAX, 0 +ld.d $ra, $a0, %desc_ld(d) +jirl $ra, $ra, %desc_call(d) +add.d $a4, $a0, $tp + +.section .tbss,"awT",@nobits +.globl a +.zero 8 +a: +.zero 2039 ## Place b at 0x7ff +b: +.zero 1 + +#--- c.s +.section .tbss,"awT",@nobits +.globl c, d +c: +.zero 2048 ## Place d at 0x1000 +d: +.zero 4 diff --git a/lldb/include/lldb/Symbol/UnwindPlan.h b/lldb/include/lldb/Symbol/UnwindPlan.h index db9aade93b6ba..9adda27b8f928 100644 --- a/lldb/include/lldb/Symbol/UnwindPlan.h +++ b/lldb/include/lldb/Symbol/UnwindPlan.h @@ -467,11 +467,12 @@ class UnwindPlan { void InsertRow(Row row, bool replace_existing = false); // Returns a pointer to the best row for the given offset into the function's - // instructions. If offset is -1 it indicates that the function start is - // unknown - the final row in the UnwindPlan is returned. In practice, the - // UnwindPlan for a function with no known start address will be the - // architectural default UnwindPlan which will only have one row. - const UnwindPlan::Row *GetRowForFunctionOffset(int offset) const; + // instructions. If offset is std::nullopt it indicates that the function + // start is unknown - the final row in the UnwindPlan is returned. In + // practice, the UnwindPlan for a function with no known start address will be + // the architectural default UnwindPlan which will only have one row. + const UnwindPlan::Row * + GetRowForFunctionOffset(std::optional offset) const; lldb::RegisterKind GetRegisterKind() const { return m_register_kind; } diff --git a/lldb/include/lldb/Target/RegisterContextUnwind.h b/lldb/include/lldb/Target/RegisterContextUnwind.h index 6cd918fedc003..c4ae29e657bfb 100644 --- a/lldb/include/lldb/Target/RegisterContextUnwind.h +++ b/lldb/include/lldb/Target/RegisterContextUnwind.h @@ -228,18 +228,17 @@ class RegisterContextUnwind : public lldb_private::RegisterContext { lldb_private::Address m_start_pc; lldb_private::Address m_current_pc; - int m_current_offset; // how far into the function we've executed; -1 if - // unknown - // 0 if no instructions have been executed yet. - - // 0 if no instructions have been executed yet. - // On architectures where the return address on the stack points - // to the instruction after the CALL, this value will have 1 - // subtracted from it. Else a function that ends in a CALL will - // have an offset pointing into the next function's address range. + /// How far into the function we've executed. 0 if no instructions have been + /// executed yet, std::nullopt if unknown. + std::optional m_current_offset; + + // How far into the function we've executed. 0 if no instructions have been + // executed yet, std::nullopt if unknown. On architectures where the return + // address on the stack points to the instruction after the CALL, this value + // will have 1 subtracted from it. Otherwise, a function that ends in a CALL + // will have an offset pointing into the next function's address range. // m_current_pc has the actual address of the "current" pc. - int m_current_offset_backed_up_one; // how far into the function we've - // executed; -1 if unknown + std::optional m_current_offset_backed_up_one; bool m_behaves_like_zeroth_frame; // this frame behaves like frame zero diff --git a/lldb/source/Core/Telemetry.cpp b/lldb/source/Core/Telemetry.cpp index 62ebdfc027d81..c7789d43c7899 100644 --- a/lldb/source/Core/Telemetry.cpp +++ b/lldb/source/Core/Telemetry.cpp @@ -10,7 +10,6 @@ #include "lldb/Core/Telemetry.h" #include "lldb/Utility/LLDBLog.h" #include "lldb/Utility/UUID.h" -#include "lldb/Version/Version.h" #include "lldb/lldb-enumerations.h" #include "lldb/lldb-forward.h" #include "llvm/ADT/StringRef.h" diff --git a/lldb/source/Symbol/UnwindPlan.cpp b/lldb/source/Symbol/UnwindPlan.cpp index 48089cbdecd97..f2846eb927bf8 100644 --- a/lldb/source/Symbol/UnwindPlan.cpp +++ b/lldb/source/Symbol/UnwindPlan.cpp @@ -417,9 +417,10 @@ void UnwindPlan::InsertRow(Row row, bool replace_existing) { } } -const UnwindPlan::Row *UnwindPlan::GetRowForFunctionOffset(int offset) const { - auto it = offset == -1 ? m_row_list.end() - : llvm::upper_bound(m_row_list, offset, RowLess()); +const UnwindPlan::Row * +UnwindPlan::GetRowForFunctionOffset(std::optional offset) const { + auto it = offset ? llvm::upper_bound(m_row_list, *offset, RowLess()) + : m_row_list.end(); if (it == m_row_list.begin()) return nullptr; // upper_bound returns the row strictly greater than our desired offset, which diff --git a/lldb/source/Target/RegisterContextUnwind.cpp b/lldb/source/Target/RegisterContextUnwind.cpp index a035c57fbfc1c..cb3d7ee479890 100644 --- a/lldb/source/Target/RegisterContextUnwind.cpp +++ b/lldb/source/Target/RegisterContextUnwind.cpp @@ -94,8 +94,9 @@ bool RegisterContextUnwind::IsUnwindPlanValidForCurrentPC( return true; } - // if m_current_offset <= 0, we've got nothing else to try - if (m_current_offset <= 0) + // If don't have an offset or we're at the start of the function, we've got + // nothing else to try. + if (!m_current_offset || m_current_offset == 0) return false; // check pc - 1 to see if it's valid @@ -198,8 +199,8 @@ void RegisterContextUnwind::InitializeZerothFrame() { m_current_offset_backed_up_one = m_current_offset; } else { m_start_pc = m_current_pc; - m_current_offset = -1; - m_current_offset_backed_up_one = -1; + m_current_offset = std::nullopt; + m_current_offset_backed_up_one = std::nullopt; } // We've set m_frame_type and m_sym_ctx before these calls. @@ -437,8 +438,8 @@ void RegisterContextUnwind::InitializeNonZerothFrame() { m_frame_type = eNormalFrame; } m_all_registers_available = false; - m_current_offset = -1; - m_current_offset_backed_up_one = -1; + m_current_offset = std::nullopt; + m_current_offset_backed_up_one = std::nullopt; RegisterKind row_register_kind = m_full_unwind_plan_sp->GetRegisterKind(); if (const UnwindPlan::Row *row = m_full_unwind_plan_sp->GetRowForFunctionOffset(0)) { @@ -569,16 +570,16 @@ void RegisterContextUnwind::InitializeNonZerothFrame() { m_current_offset = pc - m_start_pc.GetLoadAddress(&process->GetTarget()); m_current_offset_backed_up_one = m_current_offset; if (decr_pc_and_recompute_addr_range && - m_current_offset_backed_up_one > 0) { - m_current_offset_backed_up_one--; + m_current_offset_backed_up_one != 0) { + --*m_current_offset_backed_up_one; if (m_sym_ctx_valid) { m_current_pc.SetLoadAddress(pc - 1, &process->GetTarget()); } } } else { m_start_pc = m_current_pc; - m_current_offset = -1; - m_current_offset_backed_up_one = -1; + m_current_offset = std::nullopt; + m_current_offset_backed_up_one = std::nullopt; } if (IsTrapHandlerSymbol(process, m_sym_ctx)) { @@ -746,7 +747,7 @@ bool RegisterContextUnwind::BehavesLikeZerothFrame() const { // 2. m_sym_ctx should already be filled in, and // 3. m_current_pc should have the current pc value for this frame // 4. m_current_offset_backed_up_one should have the current byte offset into -// the function, maybe backed up by 1, -1 if unknown +// the function, maybe backed up by 1, std::nullopt if unknown UnwindPlanSP RegisterContextUnwind::GetFastUnwindPlanForFrame() { UnwindPlanSP unwind_plan_sp; @@ -790,7 +791,7 @@ UnwindPlanSP RegisterContextUnwind::GetFastUnwindPlanForFrame() { // 2. m_sym_ctx should already be filled in, and // 3. m_current_pc should have the current pc value for this frame // 4. m_current_offset_backed_up_one should have the current byte offset into -// the function, maybe backed up by 1, -1 if unknown +// the function, maybe backed up by 1, std::nullopt if unknown UnwindPlanSP RegisterContextUnwind::GetFullUnwindPlanForFrame() { UnwindPlanSP unwind_plan_sp; diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolBase.cpp b/lldb/tools/lldb-dap/Protocol/ProtocolBase.cpp index 0d63e37d3eafb..87fd0df018b65 100644 --- a/lldb/tools/lldb-dap/Protocol/ProtocolBase.cpp +++ b/lldb/tools/lldb-dap/Protocol/ProtocolBase.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "Protocol/ProtocolBase.h" -#include "lldb/lldb-enumerations.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/ErrorHandling.h" @@ -32,8 +31,11 @@ static bool mapRaw(const json::Value &Params, StringLiteral Prop, namespace lldb_dap::protocol { -FLAGS_ENUM(MessageType){eMessageTypeRequest, eMessageTypeResponse, - eMessageTypeEvent}; +enum MessageType : unsigned { + eMessageTypeRequest, + eMessageTypeResponse, + eMessageTypeEvent +}; bool fromJSON(const json::Value &Params, MessageType &M, json::Path P) { auto rawType = Params.getAsString(); diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolBase.h b/lldb/tools/lldb-dap/Protocol/ProtocolBase.h index 5ac68e38cb9c4..2c647610de11c 100644 --- a/lldb/tools/lldb-dap/Protocol/ProtocolBase.h +++ b/lldb/tools/lldb-dap/Protocol/ProtocolBase.h @@ -20,7 +20,6 @@ #ifndef LLDB_TOOLS_LLDB_DAP_PROTOCOL_H #define LLDB_TOOLS_LLDB_DAP_PROTOCOL_H -#include "lldb/lldb-enumerations.h" #include "llvm/Support/JSON.h" #include #include @@ -65,11 +64,11 @@ struct Event { llvm::json::Value toJSON(const Event &); bool fromJSON(const llvm::json::Value &, Event &, llvm::json::Path); -FLAGS_ENUM(ResponseMessage){ - /// The request was cancelled - eResponseMessageCancelled, - /// The request may be retried once the adapter is in a 'stopped' state - eResponseMessageNotStopped, +enum ResponseMessage : unsigned { + /// The request was cancelled + eResponseMessageCancelled, + /// The request may be retried once the adapter is in a 'stopped' state + eResponseMessageNotStopped, }; /// Response for a request. diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h index 116cf8516c52e..927106997953a 100644 --- a/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h +++ b/lldb/tools/lldb-dap/Protocol/ProtocolRequests.h @@ -22,7 +22,6 @@ #include "Protocol/ProtocolBase.h" #include "Protocol/ProtocolTypes.h" -#include "lldb/lldb-enumerations.h" #include "llvm/ADT/DenseSet.h" #include "llvm/Support/JSON.h" #include @@ -57,26 +56,26 @@ bool fromJSON(const llvm::json::Value &, DisconnectArguments &, using DisconnectResponse = VoidResponse; /// Features supported by DAP clients. -FLAGS_ENUM(ClientFeature){ - eClientFeatureVariableType, - eClientFeatureVariablePaging, - eClientFeatureRunInTerminalRequest, - eClientFeatureMemoryReferences, - eClientFeatureProgressReporting, - eClientFeatureInvalidatedEvent, - eClientFeatureMemoryEvent, - /// Client supports the `argsCanBeInterpretedByShell` attribute on the - /// `runInTerminal` request. - eClientFeatureArgsCanBeInterpretedByShell, - eClientFeatureStartDebuggingRequest, - /// The client will interpret ANSI escape sequences in the display of - /// `OutputEvent.output` and `Variable.value` fields when - /// `Capabilities.supportsANSIStyling` is also enabled. - eClientFeatureANSIStyling, +enum ClientFeature : unsigned { + eClientFeatureVariableType, + eClientFeatureVariablePaging, + eClientFeatureRunInTerminalRequest, + eClientFeatureMemoryReferences, + eClientFeatureProgressReporting, + eClientFeatureInvalidatedEvent, + eClientFeatureMemoryEvent, + /// Client supports the `argsCanBeInterpretedByShell` attribute on the + /// `runInTerminal` request. + eClientFeatureArgsCanBeInterpretedByShell, + eClientFeatureStartDebuggingRequest, + /// The client will interpret ANSI escape sequences in the display of + /// `OutputEvent.output` and `Variable.value` fields when + /// `Capabilities.supportsANSIStyling` is also enabled. + eClientFeatureANSIStyling, }; /// Format of paths reported by the debug adapter. -FLAGS_ENUM(PathFormat){ePatFormatPath, ePathFormatURI}; +enum PathFormat : unsigned { ePatFormatPath, ePathFormatURI }; /// Arguments for `initialize` request. struct InitializeRequestArguments { diff --git a/lldb/tools/lldb-dap/Protocol/ProtocolTypes.h b/lldb/tools/lldb-dap/Protocol/ProtocolTypes.h index 463f9dbbaf4ea..8f38c524ea649 100644 --- a/lldb/tools/lldb-dap/Protocol/ProtocolTypes.h +++ b/lldb/tools/lldb-dap/Protocol/ProtocolTypes.h @@ -20,7 +20,6 @@ #ifndef LLDB_TOOLS_LLDB_DAP_PROTOCOL_PROTOCOL_TYPES_H #define LLDB_TOOLS_LLDB_DAP_PROTOCOL_PROTOCOL_TYPES_H -#include "lldb/lldb-enumerations.h" #include "llvm/ADT/DenseSet.h" #include "llvm/Support/JSON.h" #include @@ -57,8 +56,12 @@ struct ExceptionBreakpointsFilter { }; llvm::json::Value toJSON(const ExceptionBreakpointsFilter &); -FLAGS_ENUM(ColumnType){eColumnTypeString, eColumnTypeNumber, eColumnTypeBoolean, - eColumnTypeTimestamp}; +enum ColumnType : unsigned { + eColumnTypeString, + eColumnTypeNumber, + eColumnTypeBoolean, + eColumnTypeTimestamp +}; /// A ColumnDescriptor specifies what module attribute to show in a column of /// the modules view, how to format it, and what the column’s label should be. @@ -87,23 +90,27 @@ llvm::json::Value toJSON(const ColumnDescriptor &); /// Names of checksum algorithms that may be supported by a debug adapter. /// Values: ‘MD5’, ‘SHA1’, ‘SHA256’, ‘timestamp’. -FLAGS_ENUM(ChecksumAlgorithm){eChecksumAlgorithmMD5, eChecksumAlgorithmSHA1, - eChecksumAlgorithmSHA256, - eChecksumAlgorithmTimestamp}; +enum ChecksumAlgorithm : unsigned { + eChecksumAlgorithmMD5, + eChecksumAlgorithmSHA1, + eChecksumAlgorithmSHA256, + eChecksumAlgorithmTimestamp +}; llvm::json::Value toJSON(const ChecksumAlgorithm &); /// Describes one or more type of breakpoint a BreakpointMode applies to. This /// is a non-exhaustive enumeration and may expand as future breakpoint types /// are added. -FLAGS_ENUM(BreakpointModeApplicability){ - /// In `SourceBreakpoint`'s. - eBreakpointModeApplicabilitySource, - /// In exception breakpoints applied in the `ExceptionFilterOptions`. - eBreakpointModeApplicabilityException, - /// In data breakpoints requested in the `DataBreakpointInfo` request. - eBreakpointModeApplicabilityData, - /// In `InstructionBreakpoint`'s. - eBreakpointModeApplicabilityInstruction}; +enum BreakpointModeApplicability : unsigned { + /// In `SourceBreakpoint`'s. + eBreakpointModeApplicabilitySource, + /// In exception breakpoints applied in the `ExceptionFilterOptions`. + eBreakpointModeApplicabilityException, + /// In data breakpoints requested in the `DataBreakpointInfo` request. + eBreakpointModeApplicabilityData, + /// In `InstructionBreakpoint`'s. + eBreakpointModeApplicabilityInstruction +}; llvm::json::Value toJSON(const BreakpointModeApplicability &); /// A `BreakpointMode` is provided as a option when setting breakpoints on @@ -126,101 +133,101 @@ struct BreakpointMode { llvm::json::Value toJSON(const BreakpointMode &); /// Debug Adapter Features flags supported by lldb-dap. -FLAGS_ENUM(AdapterFeature){ - /// The debug adapter supports ANSI escape sequences in styling of - /// `OutputEvent.output` and `Variable.value` fields. - eAdapterFeatureANSIStyling, - /// The debug adapter supports the `breakpointLocations` request. - eAdapterFeatureBreakpointLocationsRequest, - /// The debug adapter supports the `cancel` request. - eAdapterFeatureCancelRequest, - /// The debug adapter supports the `clipboard` context value in the - /// `evaluate` request. - eAdapterFeatureClipboardContext, - /// The debug adapter supports the `completions` request. - eAdapterFeatureCompletionsRequest, - /// The debug adapter supports conditional breakpoints. - eAdapterFeatureConditionalBreakpoints, - /// The debug adapter supports the `configurationDone` request. - eAdapterFeatureConfigurationDoneRequest, - /// The debug adapter supports the `asAddress` and `bytes` fields in the - /// `dataBreakpointInfo` request. - eAdapterFeatureDataBreakpointBytes, - /// The debug adapter supports data breakpoints. - eAdapterFeatureDataBreakpoints, - /// The debug adapter supports the delayed loading of parts of the stack, - /// which requires that both the `startFrame` and `levels` arguments and the - /// `totalFrames` result of the `stackTrace` request are supported. - eAdapterFeatureDelayedStackTraceLoading, - /// The debug adapter supports the `disassemble` request. - eAdapterFeatureDisassembleRequest, - /// The debug adapter supports a (side effect free) `evaluate` request for - /// data hovers. - eAdapterFeatureEvaluateForHovers, - /// The debug adapter supports `filterOptions` as an argument on the - /// `setExceptionBreakpoints` request. - eAdapterFeatureExceptionFilterOptions, - /// The debug adapter supports the `exceptionInfo` request. - eAdapterFeatureExceptionInfoRequest, - /// The debug adapter supports `exceptionOptions` on the - /// `setExceptionBreakpoints` request. - eAdapterFeatureExceptionOptions, - /// The debug adapter supports function breakpoints. - eAdapterFeatureFunctionBreakpoints, - /// The debug adapter supports the `gotoTargets` request. - eAdapterFeatureGotoTargetsRequest, - /// The debug adapter supports breakpoints that break execution after a - /// specified number of hits. - eAdapterFeatureHitConditionalBreakpoints, - /// The debug adapter supports adding breakpoints based on instruction - /// references. - eAdapterFeatureInstructionBreakpoints, - /// The debug adapter supports the `loadedSources` request. - eAdapterFeatureLoadedSourcesRequest, - /// The debug adapter supports log points by interpreting the `logMessage` - /// attribute of the `SourceBreakpoint`. - eAdapterFeatureLogPoints, - /// The debug adapter supports the `modules` request. - eAdapterFeatureModulesRequest, - /// The debug adapter supports the `readMemory` request. - eAdapterFeatureReadMemoryRequest, - /// The debug adapter supports restarting a frame. - eAdapterFeatureRestartFrame, - /// The debug adapter supports the `restart` request. In this case a client - /// should not implement `restart` by terminating and relaunching the - /// adapter but by calling the `restart` request. - eAdapterFeatureRestartRequest, - /// The debug adapter supports the `setExpression` request. - eAdapterFeatureSetExpression, - /// The debug adapter supports setting a variable to a value. - eAdapterFeatureSetVariable, - /// The debug adapter supports the `singleThread` property on the execution - /// requests (`continue`, `next`, `stepIn`, `stepOut`, `reverseContinue`, - /// `stepBack`). - eAdapterFeatureSingleThreadExecutionRequests, - /// The debug adapter supports stepping back via the `stepBack` and - /// `reverseContinue` requests. - eAdapterFeatureStepBack, - /// The debug adapter supports the `stepInTargets` request. - eAdapterFeatureStepInTargetsRequest, - /// The debug adapter supports stepping granularities (argument - /// `granularity`) for the stepping requests. - eAdapterFeatureSteppingGranularity, - /// The debug adapter supports the `terminate` request. - eAdapterFeatureTerminateRequest, - /// The debug adapter supports the `terminateThreads` request. - eAdapterFeatureTerminateThreadsRequest, - /// The debug adapter supports the `suspendDebuggee` attribute on the - /// `disconnect` request. - eAdapterFeatureSuspendDebuggee, - /// The debug adapter supports a `format` attribute on the `stackTrace`, - /// `variables`, and `evaluate` requests. - eAdapterFeatureValueFormattingOptions, - /// The debug adapter supports the `writeMemory` request. - eAdapterFeatureWriteMemoryRequest, - /// The debug adapter supports the `terminateDebuggee` attribute on the - /// `disconnect` request. - eAdapterFeatureTerminateDebuggee, +enum AdapterFeature : unsigned { + /// The debug adapter supports ANSI escape sequences in styling of + /// `OutputEvent.output` and `Variable.value` fields. + eAdapterFeatureANSIStyling, + /// The debug adapter supports the `breakpointLocations` request. + eAdapterFeatureBreakpointLocationsRequest, + /// The debug adapter supports the `cancel` request. + eAdapterFeatureCancelRequest, + /// The debug adapter supports the `clipboard` context value in the + /// `evaluate` request. + eAdapterFeatureClipboardContext, + /// The debug adapter supports the `completions` request. + eAdapterFeatureCompletionsRequest, + /// The debug adapter supports conditional breakpoints. + eAdapterFeatureConditionalBreakpoints, + /// The debug adapter supports the `configurationDone` request. + eAdapterFeatureConfigurationDoneRequest, + /// The debug adapter supports the `asAddress` and `bytes` fields in the + /// `dataBreakpointInfo` request. + eAdapterFeatureDataBreakpointBytes, + /// The debug adapter supports data breakpoints. + eAdapterFeatureDataBreakpoints, + /// The debug adapter supports the delayed loading of parts of the stack, + /// which requires that both the `startFrame` and `levels` arguments and the + /// `totalFrames` result of the `stackTrace` request are supported. + eAdapterFeatureDelayedStackTraceLoading, + /// The debug adapter supports the `disassemble` request. + eAdapterFeatureDisassembleRequest, + /// The debug adapter supports a (side effect free) `evaluate` request for + /// data hovers. + eAdapterFeatureEvaluateForHovers, + /// The debug adapter supports `filterOptions` as an argument on the + /// `setExceptionBreakpoints` request. + eAdapterFeatureExceptionFilterOptions, + /// The debug adapter supports the `exceptionInfo` request. + eAdapterFeatureExceptionInfoRequest, + /// The debug adapter supports `exceptionOptions` on the + /// `setExceptionBreakpoints` request. + eAdapterFeatureExceptionOptions, + /// The debug adapter supports function breakpoints. + eAdapterFeatureFunctionBreakpoints, + /// The debug adapter supports the `gotoTargets` request. + eAdapterFeatureGotoTargetsRequest, + /// The debug adapter supports breakpoints that break execution after a + /// specified number of hits. + eAdapterFeatureHitConditionalBreakpoints, + /// The debug adapter supports adding breakpoints based on instruction + /// references. + eAdapterFeatureInstructionBreakpoints, + /// The debug adapter supports the `loadedSources` request. + eAdapterFeatureLoadedSourcesRequest, + /// The debug adapter supports log points by interpreting the `logMessage` + /// attribute of the `SourceBreakpoint`. + eAdapterFeatureLogPoints, + /// The debug adapter supports the `modules` request. + eAdapterFeatureModulesRequest, + /// The debug adapter supports the `readMemory` request. + eAdapterFeatureReadMemoryRequest, + /// The debug adapter supports restarting a frame. + eAdapterFeatureRestartFrame, + /// The debug adapter supports the `restart` request. In this case a client + /// should not implement `restart` by terminating and relaunching the + /// adapter but by calling the `restart` request. + eAdapterFeatureRestartRequest, + /// The debug adapter supports the `setExpression` request. + eAdapterFeatureSetExpression, + /// The debug adapter supports setting a variable to a value. + eAdapterFeatureSetVariable, + /// The debug adapter supports the `singleThread` property on the execution + /// requests (`continue`, `next`, `stepIn`, `stepOut`, `reverseContinue`, + /// `stepBack`). + eAdapterFeatureSingleThreadExecutionRequests, + /// The debug adapter supports stepping back via the `stepBack` and + /// `reverseContinue` requests. + eAdapterFeatureStepBack, + /// The debug adapter supports the `stepInTargets` request. + eAdapterFeatureStepInTargetsRequest, + /// The debug adapter supports stepping granularities (argument + /// `granularity`) for the stepping requests. + eAdapterFeatureSteppingGranularity, + /// The debug adapter supports the `terminate` request. + eAdapterFeatureTerminateRequest, + /// The debug adapter supports the `terminateThreads` request. + eAdapterFeatureTerminateThreadsRequest, + /// The debug adapter supports the `suspendDebuggee` attribute on the + /// `disconnect` request. + eAdapterFeatureSuspendDebuggee, + /// The debug adapter supports a `format` attribute on the `stackTrace`, + /// `variables`, and `evaluate` requests. + eAdapterFeatureValueFormattingOptions, + /// The debug adapter supports the `writeMemory` request. + eAdapterFeatureWriteMemoryRequest, + /// The debug adapter supports the `terminateDebuggee` attribute on the + /// `disconnect` request. + eAdapterFeatureTerminateDebuggee, }; /// Information about the capabilities of a debug adapter. @@ -261,10 +268,10 @@ struct Capabilities { }; llvm::json::Value toJSON(const Capabilities &); -FLAGS_ENUM(PresentationHint){ - ePresentationHintNormal, - ePresentationHintEmphasize, - ePresentationHintDeemphasize, +enum PresentationHint : unsigned { + ePresentationHintNormal, + ePresentationHintEmphasize, + ePresentationHintDeemphasize, }; /// A `Source` is a descriptor for source code. It is returned from the debug diff --git a/lldb/unittests/Disassembler/ARM/CMakeLists.txt b/lldb/unittests/Disassembler/ARM/CMakeLists.txt deleted file mode 100644 index 91af06fa19d6f..0000000000000 --- a/lldb/unittests/Disassembler/ARM/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -add_lldb_unittest(DisassemblerTests - TestArm64Disassembly.cpp - TestArmv7Disassembly.cpp - LINK_LIBS - lldbCore - lldbSymbol - lldbTarget - lldbPluginDisassemblerLLVMC - lldbPluginProcessUtility - LINK_COMPONENTS - Support - ${LLVM_TARGETS_TO_BUILD}) diff --git a/lldb/unittests/Disassembler/CMakeLists.txt b/lldb/unittests/Disassembler/CMakeLists.txt index 208f1807427f4..2a76158bf90fd 100644 --- a/lldb/unittests/Disassembler/CMakeLists.txt +++ b/lldb/unittests/Disassembler/CMakeLists.txt @@ -1,11 +1,34 @@ +set(disas_srcs "") + if("ARM" IN_LIST LLVM_TARGETS_TO_BUILD) - add_subdirectory(ARM) + list(APPEND + ARM/TestArm64Disassembly.cpp + ARM/TestArmv7Disassembly.cpp + ) endif() if("X86" IN_LIST LLVM_TARGETS_TO_BUILD) - add_subdirectory(x86) + list(APPEND disas_srcs + x86/TestGetControlFlowKindx86.cpp + ) endif() if("RISCV" IN_LIST LLVM_TARGETS_TO_BUILD) - add_subdirectory(RISCV) + list(APPEND disas_srcs + RISCV/TestMCDisasmInstanceRISCV.cpp + ) +endif() + +if (disas_srcs) + add_lldb_unittest(DisassemblerTests + ${disas_srcs} + LINK_LIBS + lldbCore + lldbSymbol + lldbTarget + lldbPluginDisassemblerLLVMC + lldbPluginProcessUtility + LINK_COMPONENTS + Support + ${LLVM_TARGETS_TO_BUILD}) endif() diff --git a/lldb/unittests/Disassembler/RISCV/CMakeLists.txt b/lldb/unittests/Disassembler/RISCV/CMakeLists.txt deleted file mode 100644 index 5bcc3e948335c..0000000000000 --- a/lldb/unittests/Disassembler/RISCV/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -add_lldb_unittest(MCDisasmInstanceRISCVTests - TestMCDisasmInstanceRISCV.cpp - LINK_LIBS - lldbCore - lldbSymbol - lldbTarget - lldbPluginDisassemblerLLVMC - lldbPluginProcessUtility - LINK_COMPONENTS - Support - ${LLVM_TARGETS_TO_BUILD} - ) diff --git a/lldb/unittests/Disassembler/x86/CMakeLists.txt b/lldb/unittests/Disassembler/x86/CMakeLists.txt deleted file mode 100644 index 31d84cf5d8365..0000000000000 --- a/lldb/unittests/Disassembler/x86/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -add_lldb_unittest(GetControlFlowKindx86Tests - TestGetControlFlowKindx86.cpp - LINK_LIBS - lldbCore - lldbSymbol - lldbTarget - lldbPluginDisassemblerLLVMC - lldbPluginProcessUtility - LINK_COMPONENTS - Support - ${LLVM_TARGETS_TO_BUILD} - ) diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake index 185c9b63aada3..f50f60ec0023f 100644 --- a/llvm/cmake/modules/HandleLLVMOptions.cmake +++ b/llvm/cmake/modules/HandleLLVMOptions.cmake @@ -690,6 +690,12 @@ endif( LLVM_COMPILER_IS_GCC_COMPATIBLE OR CMAKE_CXX_COMPILER_ID MATCHES "XL" ) if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") append("-Werror=unguarded-availability-new" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) + if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 21.0) + # LLVM has a policy of including virtual "anchor" functions to control + # where the vtable is emitted. In `final` classes, these are exactly what + # this warning detects: unnecessary virtual methods. + append("-Wno-unnecessary-virtual-specifier" CMAKE_CXX_FLAGS) + endif() endif() if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND LLVM_ENABLE_LTO) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 806874fa76b2e..34a6bb8f13d6b 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -6212,6 +6212,35 @@ following: DW_ATE_unsigned = 7 DW_ATE_unsigned_char = 8 +.. _DIFixedPointType: + +DIFixedPointType +"""""""""""""""" + +``DIFixedPointType`` nodes represent fixed-point types. A fixed-point +type is conceptually an integer with a scale factor. +``DIFixedPointType`` is derived from ``DIBasicType`` and inherits its +attributes. However, only certain encodings are accepted: + +.. code-block:: text + + DW_ATE_signed_fixed = 13 + DW_ATE_unsigned_fixed = 14 + +There are three kinds of fixed-point type: binary, where the scale +factor is a power of 2; decimal, where the scale factor is a power of +10; and rational, where the scale factor is an arbitrary rational +number. + +.. code-block:: text + + !0 = !DIFixedPointType(name: "decimal", size: 8, encoding: DW_ATE_signed_fixed, + kind: Decimal, factor: -4) + !1 = !DIFixedPointType(name: "binary", size: 8, encoding: DW_ATE_unsigned_fixed, + kind: Binary, factor: -16) + !2 = !DIFixedPointType(name: "rational", size: 8, encoding: DW_ATE_signed_fixed, + kind: Rational, numerator: 1234, denominator: 5678) + .. _DISubroutineType: DISubroutineType diff --git a/llvm/include/llvm-c/DebugInfo.h b/llvm/include/llvm-c/DebugInfo.h index 30bbaa4d34161..9fbe31d2629bd 100644 --- a/llvm/include/llvm-c/DebugInfo.h +++ b/llvm/include/llvm-c/DebugInfo.h @@ -173,6 +173,7 @@ enum { LLVMDISubrangeMetadataKind, LLVMDIEnumeratorMetadataKind, LLVMDIBasicTypeMetadataKind, + LLVMDIFixedPointTypeMetadataKind, LLVMDIDerivedTypeMetadataKind, LLVMDICompositeTypeMetadataKind, LLVMDISubroutineTypeMetadataKind, diff --git a/llvm/include/llvm/ADT/DenseMap.h b/llvm/include/llvm/ADT/DenseMap.h index f0f992f8eac38..ea9ba6f47ac1a 100644 --- a/llvm/include/llvm/ADT/DenseMap.h +++ b/llvm/include/llvm/ADT/DenseMap.h @@ -14,6 +14,7 @@ #ifndef LLVM_ADT_DENSEMAP_H #define LLVM_ADT_DENSEMAP_H +#include "llvm/ADT/ADL.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/EpochTracker.h" #include "llvm/Support/AlignOf.h" @@ -302,6 +303,11 @@ class DenseMapBase : public DebugEpochBase { insert(*I); } + /// Inserts range of 'std::pair' values into the map. + template void insert_range(Range &&R) { + insert(adl_begin(R), adl_end(R)); + } + template std::pair insert_or_assign(const KeyT &Key, V &&Val) { auto Ret = try_emplace(Key, std::forward(Val)); diff --git a/llvm/include/llvm/Analysis/Lint.h b/llvm/include/llvm/Analysis/Lint.h index 8dffa1ecb5f38..b0bc0ff4e1d89 100644 --- a/llvm/include/llvm/Analysis/Lint.h +++ b/llvm/include/llvm/Analysis/Lint.h @@ -29,14 +29,20 @@ class Function; /// /// This should only be used for debugging, because it plays games with /// PassManagers and stuff. -void lintModule(const Module &M); +void lintModule(const Module &M, bool AbortOnError = false); // Lint a function. -void lintFunction(const Function &F); +void lintFunction(const Function &F, bool AbortOnError = false); class LintPass : public PassInfoMixin { + const bool AbortOnError; + public: + LintPass(bool AbortOnError) : AbortOnError(AbortOnError) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + void printPipeline(raw_ostream &OS, + function_ref MapClassName2PassName); }; } // namespace llvm diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index cb6f47e3a76be..f715e0ec8dbb4 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -216,6 +216,21 @@ class MemoryDepChecker { return MaxSafeVectorWidthInBits; } + /// Return true if there are no store-load forwarding dependencies. + bool isSafeForAnyStoreLoadForwardDistances() const { + return MaxStoreLoadForwardSafeDistanceInBits == + std::numeric_limits::max(); + } + + /// Return safe power-of-2 number of elements, which do not prevent store-load + /// forwarding, multiplied by the size of the elements in bits. + uint64_t getStoreLoadForwardSafeDistanceInBits() const { + assert(!isSafeForAnyStoreLoadForwardDistances() && + "Expected the distance, that prevent store-load forwarding, to be " + "set."); + return MaxStoreLoadForwardSafeDistanceInBits; + } + /// In same cases when the dependency check fails we can still /// vectorize the loop with a dynamic array access check. bool shouldRetryWithRuntimeCheck() const { @@ -304,6 +319,11 @@ class MemoryDepChecker { /// restrictive. uint64_t MaxSafeVectorWidthInBits = -1U; + /// Maximum power-of-2 number of elements, which do not prevent store-load + /// forwarding, multiplied by the size of the elements in bits. + uint64_t MaxStoreLoadForwardSafeDistanceInBits = + std::numeric_limits::max(); + /// If we see a non-constant dependence distance we can still try to /// vectorize this loop with runtime checks. bool FoundNonConstantDistanceDependence = false; @@ -357,7 +377,8 @@ class MemoryDepChecker { /// /// \return false if we shouldn't vectorize at all or avoid larger /// vectorization factors by limiting MinDepDistBytes. - bool couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize); + bool couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize, + unsigned CommonStride = 0); /// Updates the current safety status with \p S. We can go from Safe to /// either PossiblySafeWithRtChecks or Unsafe and from diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h index 81b9929b1fab8..a8f9c71781701 100644 --- a/llvm/include/llvm/AsmParser/LLToken.h +++ b/llvm/include/llvm/AsmParser/LLToken.h @@ -494,6 +494,7 @@ enum Kind { DwarfCC, // DW_CC_foo EmissionKind, // lineTablesOnly NameTableKind, // GNU + FixedPointKind, // Fixed point DwarfOp, // DW_OP_foo DIFlag, // DIFlagFoo DISPFlag, // DISPFlagFoo diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index ec2535ac85966..92b6e68d9d0a7 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -386,6 +386,7 @@ enum MetadataCodes { METADATA_ARG_LIST = 46, // [n x [type num, value num]] METADATA_ASSIGN_ID = 47, // [distinct, ...] METADATA_SUBRANGE_TYPE = 48, // [distinct, ...] + METADATA_FIXED_POINT_TYPE = 49, // [distinct, ...] }; // The constants block (CONSTANTS_BLOCK_ID) describes emission for each diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h index 4dd45a1a7774d..16363fbaa4f9a 100644 --- a/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -18,6 +18,8 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/StaticDataProfileInfo.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/DwarfStringPoolEntry.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -132,6 +134,12 @@ class AsmPrinter : public MachineFunctionPass { /// default, this is equal to CurrentFnSym. MCSymbol *CurrentFnSymForSize = nullptr; + /// Provides the profile information for constants. + const StaticDataProfileInfo *SDPI = nullptr; + + /// The profile summary information. + const ProfileSummaryInfo *PSI = nullptr; + /// Map a basic block section ID to the begin and end symbols of that section /// which determine the section's range. struct MBBSectionRange { @@ -330,6 +338,10 @@ class AsmPrinter : public MachineFunctionPass { DwarfUsesRelocationsAcrossSections = Enable; } + /// Returns a section suffix (hot or unlikely) for the constant if profiles + /// are available. Returns empty string otherwise. + StringRef getConstantSectionSuffix(const Constant *C) const; + //===------------------------------------------------------------------===// // XRay instrumentation implementation. //===------------------------------------------------------------------===// diff --git a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h index 7c929262f6823..8b0e5798d1b61 100644 --- a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h +++ b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h @@ -66,6 +66,12 @@ class TargetLoweringObjectFileELF : public TargetLoweringObjectFile { const Constant *C, Align &Alignment) const override; + /// Similar to the function above, but append \p SectionSuffix to the section + /// name. + MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind, + const Constant *C, Align &Alignment, + StringRef SectionSuffix) const override; + MCSection *getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const override; diff --git a/llvm/include/llvm/CodeGen/TileShapeInfo.h b/llvm/include/llvm/CodeGen/TileShapeInfo.h index 24f303a7d9d13..9cea327819895 100644 --- a/llvm/include/llvm/CodeGen/TileShapeInfo.h +++ b/llvm/include/llvm/CodeGen/TileShapeInfo.h @@ -48,8 +48,7 @@ class ShapeT { ColImm(InvalidImmShape) { assert(ShapesOperands.size() % 2 == 0 && "Miss row or col!"); - for (auto *Shape : ShapesOperands) - Shapes.push_back(Shape); + llvm::append_range(Shapes, ShapesOperands); if (MRI) deduceImm(MRI); diff --git a/llvm/include/llvm/IR/DIBuilder.h b/llvm/include/llvm/IR/DIBuilder.h index b63e564dfd36b..8e62b810ff147 100644 --- a/llvm/include/llvm/IR/DIBuilder.h +++ b/llvm/include/llvm/IR/DIBuilder.h @@ -215,6 +215,42 @@ namespace llvm { DINode::DIFlags Flags = DINode::FlagZero, uint32_t NumExtraInhabitants = 0); + /// Create debugging information entry for a binary fixed-point type. + /// \param Name Type name. + /// \param Encoding DWARF encoding code, either + /// dwarf::DW_ATE_signed_fixed or DW_ATE_unsigned_fixed. + /// \param Flags Optional DWARF attributes, e.g., DW_AT_endianity. + /// \param Factor Binary scale factor. + DIFixedPointType * + createBinaryFixedPointType(StringRef Name, uint64_t SizeInBits, + uint32_t AlignInBits, unsigned Encoding, + DINode::DIFlags Flags, int Factor); + + /// Create debugging information entry for a decimal fixed-point type. + /// \param Name Type name. + /// \param Encoding DWARF encoding code, either + /// dwarf::DW_ATE_signed_fixed or DW_ATE_unsigned_fixed. + /// \param Flags Optional DWARF attributes, e.g., DW_AT_endianity. + /// \param Factor Decimal scale factor. + DIFixedPointType * + createDecimalFixedPointType(StringRef Name, uint64_t SizeInBits, + uint32_t AlignInBits, unsigned Encoding, + DINode::DIFlags Flags, int Factor); + + /// Create debugging information entry for an arbitrary rational + /// fixed-point type. + /// \param Name Type name. + /// \param Encoding DWARF encoding code, either + /// dwarf::DW_ATE_signed_fixed or DW_ATE_unsigned_fixed. + /// \param Flags Optional DWARF attributes, e.g., DW_AT_endianity. + /// \param Numerator Numerator of scale factor. + /// \param Denominator Denominator of scale factor. + DIFixedPointType * + createRationalFixedPointType(StringRef Name, uint64_t SizeInBits, + uint32_t AlignInBits, unsigned Encoding, + DINode::DIFlags Flags, APInt Numerator, + APInt Denominator); + /// Create debugging information entry for a string /// type. /// \param Name Type name. diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h index 62a59ddaee599..174ff09f56bdf 100644 --- a/llvm/include/llvm/IR/DebugInfoMetadata.h +++ b/llvm/include/llvm/IR/DebugInfoMetadata.h @@ -199,6 +199,7 @@ class DINode : public MDNode { case DISubrangeKind: case DIEnumeratorKind: case DIBasicTypeKind: + case DIFixedPointTypeKind: case DIStringTypeKind: case DISubrangeTypeKind: case DIDerivedTypeKind: @@ -547,6 +548,7 @@ class DIScope : public DINode { default: return false; case DIBasicTypeKind: + case DIFixedPointTypeKind: case DIStringTypeKind: case DISubrangeTypeKind: case DIDerivedTypeKind: @@ -806,6 +808,7 @@ class DIType : public DIScope { default: return false; case DIBasicTypeKind: + case DIFixedPointTypeKind: case DIStringTypeKind: case DISubrangeTypeKind: case DIDerivedTypeKind: @@ -826,6 +829,7 @@ class DIBasicType : public DIType { unsigned Encoding; +protected: DIBasicType(LLVMContext &C, StorageType Storage, unsigned Tag, uint64_t SizeInBits, uint32_t AlignInBits, unsigned Encoding, uint32_t NumExtraInhabitants, DIFlags Flags, @@ -833,6 +837,13 @@ class DIBasicType : public DIType { : DIType(C, DIBasicTypeKind, Storage, Tag, 0, SizeInBits, AlignInBits, 0, NumExtraInhabitants, Flags, Ops), Encoding(Encoding) {} + DIBasicType(LLVMContext &C, unsigned ID, StorageType Storage, unsigned Tag, + uint64_t SizeInBits, uint32_t AlignInBits, unsigned Encoding, + uint32_t NumExtraInhabitants, DIFlags Flags, + ArrayRef Ops) + : DIType(C, ID, Storage, Tag, 0, SizeInBits, AlignInBits, 0, + NumExtraInhabitants, Flags, Ops), + Encoding(Encoding) {} ~DIBasicType() = default; static DIBasicType *getImpl(LLVMContext &Context, unsigned Tag, @@ -897,7 +908,132 @@ class DIBasicType : public DIType { std::optional getSignedness() const; static bool classof(const Metadata *MD) { - return MD->getMetadataID() == DIBasicTypeKind; + return MD->getMetadataID() == DIBasicTypeKind || + MD->getMetadataID() == DIFixedPointTypeKind; + } +}; + +/// Fixed-point type. +class DIFixedPointType : public DIBasicType { + friend class LLVMContextImpl; + friend class MDNode; + + // Actually FixedPointKind. + unsigned Kind; + // Used for binary and decimal. + int Factor; + // Used for rational. + APInt Numerator; + APInt Denominator; + + DIFixedPointType(LLVMContext &C, StorageType Storage, unsigned Tag, + uint64_t SizeInBits, uint32_t AlignInBits, unsigned Encoding, + DIFlags Flags, unsigned Kind, int Factor, + ArrayRef Ops) + : DIBasicType(C, DIFixedPointTypeKind, Storage, Tag, SizeInBits, + AlignInBits, Encoding, 0, Flags, Ops), + Kind(Kind), Factor(Factor) { + assert(Kind == FixedPointBinary || Kind == FixedPointDecimal); + } + DIFixedPointType(LLVMContext &C, StorageType Storage, unsigned Tag, + uint64_t SizeInBits, uint32_t AlignInBits, unsigned Encoding, + DIFlags Flags, unsigned Kind, APInt Numerator, + APInt Denominator, ArrayRef Ops) + : DIBasicType(C, DIFixedPointTypeKind, Storage, Tag, SizeInBits, + AlignInBits, Encoding, 0, Flags, Ops), + Kind(Kind), Factor(0), Numerator(Numerator), Denominator(Denominator) { + assert(Kind == FixedPointRational); + } + DIFixedPointType(LLVMContext &C, StorageType Storage, unsigned Tag, + uint64_t SizeInBits, uint32_t AlignInBits, unsigned Encoding, + DIFlags Flags, unsigned Kind, int Factor, APInt Numerator, + APInt Denominator, ArrayRef Ops) + : DIBasicType(C, DIFixedPointTypeKind, Storage, Tag, SizeInBits, + AlignInBits, Encoding, 0, Flags, Ops), + Kind(Kind), Factor(Factor), Numerator(Numerator), + Denominator(Denominator) {} + ~DIFixedPointType() = default; + + static DIFixedPointType * + getImpl(LLVMContext &Context, unsigned Tag, StringRef Name, + uint64_t SizeInBits, uint32_t AlignInBits, unsigned Encoding, + DIFlags Flags, unsigned Kind, int Factor, APInt Numerator, + APInt Denominator, StorageType Storage, bool ShouldCreate = true) { + return getImpl(Context, Tag, getCanonicalMDString(Context, Name), + SizeInBits, AlignInBits, Encoding, Flags, Kind, Factor, + Numerator, Denominator, Storage, ShouldCreate); + } + static DIFixedPointType * + getImpl(LLVMContext &Context, unsigned Tag, MDString *Name, + uint64_t SizeInBits, uint32_t AlignInBits, unsigned Encoding, + DIFlags Flags, unsigned Kind, int Factor, APInt Numerator, + APInt Denominator, StorageType Storage, bool ShouldCreate = true); + + TempDIFixedPointType cloneImpl() const { + return getTemporary(getContext(), getTag(), getName(), getSizeInBits(), + getAlignInBits(), getEncoding(), getFlags(), Kind, + Factor, Numerator, Denominator); + } + +public: + enum FixedPointKind : unsigned { + /// Scale factor 2^Factor. + FixedPointBinary, + /// Scale factor 10^Factor. + FixedPointDecimal, + /// Arbitrary rational scale factor. + FixedPointRational, + LastFixedPointKind = FixedPointRational, + }; + + static std::optional getFixedPointKind(StringRef Str); + static const char *fixedPointKindString(FixedPointKind); + + DEFINE_MDNODE_GET(DIFixedPointType, + (unsigned Tag, MDString *Name, uint64_t SizeInBits, + uint32_t AlignInBits, unsigned Encoding, DIFlags Flags, + unsigned Kind, int Factor, APInt Numerator, + APInt Denominator), + (Tag, Name, SizeInBits, AlignInBits, Encoding, Flags, Kind, + Factor, Numerator, Denominator)) + DEFINE_MDNODE_GET(DIFixedPointType, + (unsigned Tag, StringRef Name, uint64_t SizeInBits, + uint32_t AlignInBits, unsigned Encoding, DIFlags Flags, + unsigned Kind, int Factor, APInt Numerator, + APInt Denominator), + (Tag, Name, SizeInBits, AlignInBits, Encoding, Flags, Kind, + Factor, Numerator, Denominator)) + + TempDIFixedPointType clone() const { return cloneImpl(); } + + bool isBinary() const { return Kind == FixedPointBinary; } + bool isDecimal() const { return Kind == FixedPointDecimal; } + bool isRational() const { return Kind == FixedPointRational; } + + bool isSigned() const; + + FixedPointKind getKind() const { return static_cast(Kind); } + + int getFactorRaw() const { return Factor; } + int getFactor() const { + assert(Kind == FixedPointBinary || Kind == FixedPointDecimal); + return Factor; + } + + const APInt &getNumeratorRaw() const { return Numerator; } + const APInt &getNumerator() const { + assert(Kind == FixedPointRational); + return Numerator; + } + + const APInt &getDenominatorRaw() const { return Denominator; } + const APInt &getDenominator() const { + assert(Kind == FixedPointRational); + return Denominator; + } + + static bool classof(const Metadata *MD) { + return MD->getMetadataID() == DIFixedPointTypeKind; } }; diff --git a/llvm/include/llvm/IR/Metadata.def b/llvm/include/llvm/IR/Metadata.def index 7cb257fefbc38..511bf48707f00 100644 --- a/llvm/include/llvm/IR/Metadata.def +++ b/llvm/include/llvm/IR/Metadata.def @@ -119,6 +119,7 @@ HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DICommonBlock) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIStringType) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIGenericSubrange) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DISubrangeType) +HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIFixedPointType) #undef HANDLE_METADATA #undef HANDLE_METADATA_LEAF diff --git a/llvm/include/llvm/MC/MCAsmInfo.h b/llvm/include/llvm/MC/MCAsmInfo.h index d7beebf614516..3134ee02f54be 100644 --- a/llvm/include/llvm/MC/MCAsmInfo.h +++ b/llvm/include/llvm/MC/MCAsmInfo.h @@ -650,9 +650,6 @@ class MCAsmInfo { bool doDwarfFDESymbolsUseAbsDiff() const { return DwarfFDESymbolsUseAbsDiff; } bool useDwarfRegNumForCFI() const { return DwarfRegNumForCFI; } bool useParensForSymbolVariant() const { return UseParensForSymbolVariant; } - bool useParensForDollarSignNames() const { - return UseParensForDollarSignNames; - } bool supportsExtendedDwarfLocDirective() const { return SupportsExtendedDwarfLocDirective; } diff --git a/llvm/include/llvm/MC/MCExpr.h b/llvm/include/llvm/MC/MCExpr.h index 5bfbd2d9f8e71..12830ee648ae0 100644 --- a/llvm/include/llvm/MC/MCExpr.h +++ b/llvm/include/llvm/MC/MCExpr.h @@ -82,7 +82,7 @@ class MCExpr { /// @{ void print(raw_ostream &OS, const MCAsmInfo *MAI, - bool InParens = false) const; + int SurroundingPrec = 0) const; void dump() const; /// Returns whether the given symbol is used anywhere in the expression or @@ -218,14 +218,6 @@ class MCSymbolRefExpr : public MCExpr { VK_WASM_GOT_TLS, // Wasm global index of TLS symbol. VK_WASM_FUNCINDEX, // Wasm function index. - VK_AMDGPU_GOTPCREL32_LO, // symbol@gotpcrel32@lo - VK_AMDGPU_GOTPCREL32_HI, // symbol@gotpcrel32@hi - VK_AMDGPU_REL32_LO, // symbol@rel32@lo - VK_AMDGPU_REL32_HI, // symbol@rel32@hi - VK_AMDGPU_REL64, // symbol@rel64 - VK_AMDGPU_ABS32_LO, // symbol@abs32@lo - VK_AMDGPU_ABS32_HI, // symbol@abs32@hi - FirstTargetSpecifier, }; diff --git a/llvm/include/llvm/Target/TargetLoweringObjectFile.h b/llvm/include/llvm/Target/TargetLoweringObjectFile.h index 9fc09bb7db6c2..47617424a9688 100644 --- a/llvm/include/llvm/Target/TargetLoweringObjectFile.h +++ b/llvm/include/llvm/Target/TargetLoweringObjectFile.h @@ -104,6 +104,13 @@ class TargetLoweringObjectFile : public MCObjectFileInfo { SectionKind Kind, const Constant *C, Align &Alignment) const; + /// Similar to the function above, but append \p SectionSuffix to the section + /// name. + virtual MCSection *getSectionForConstant(const DataLayout &DL, + SectionKind Kind, const Constant *C, + Align &Alignment, + StringRef SectionSuffix) const; + virtual MCSection * getSectionForMachineBasicBlock(const Function &F, const MachineBasicBlock &MBB, diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index c3a04f9373dbe..d654ac3ec9273 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -382,7 +382,8 @@ class LoopVectorizationLegality { const LoopAccessInfo *getLAI() const { return LAI; } bool isSafeForAnyVectorWidth() const { - return LAI->getDepChecker().isSafeForAnyVectorWidth(); + return LAI->getDepChecker().isSafeForAnyVectorWidth() && + LAI->getDepChecker().isSafeForAnyStoreLoadForwardDistances(); } uint64_t getMaxSafeVectorWidthInBits() const { @@ -406,6 +407,17 @@ class LoopVectorizationLegality { return hasUncountableEarlyExit() ? getUncountableEdge()->second : nullptr; } + /// Return true if there is store-load forwarding dependencies. + bool isSafeForAnyStoreLoadForwardDistances() const { + return LAI->getDepChecker().isSafeForAnyStoreLoadForwardDistances(); + } + + /// Return safe power-of-2 number of elements, which do not prevent store-load + /// forwarding and safe to operate simultaneously. + uint64_t getMaxStoreLoadForwardSafeDistanceInBits() const { + return LAI->getDepChecker().getStoreLoadForwardSafeDistanceInBits(); + } + /// Returns true if vector representation of the instruction \p I /// requires mask. bool isMaskRequired(const Instruction *I) const { diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h index 4ea840c099e70..f74dcecb7e3e6 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h @@ -43,8 +43,7 @@ class ShuffleMask { static ShuffleMask getIdentity(unsigned Sz) { IndicesVecT Indices; Indices.reserve(Sz); - for (auto Idx : seq(0, (int)Sz)) - Indices.push_back(Idx); + llvm::append_range(Indices, seq(0, (int)Sz)); return ShuffleMask(std::move(Indices)); } /// \Returns true if the mask is a perfect identity mask with consecutive diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index e42b2bd82cf2e..9f193b610328b 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -3295,9 +3295,12 @@ InlineCostAnnotationPrinterPass::run(Function &F, [&](Function &F) -> AssumptionCache & { return FAM.getResult(F); }; - Module *M = F.getParent(); - ProfileSummaryInfo PSI(*M); - TargetTransformInfo TTI(M->getDataLayout()); + + auto &MAMProxy = FAM.getResult(F); + ProfileSummaryInfo *PSI = + MAMProxy.getCachedResult(*F.getParent()); + const TargetTransformInfo &TTI = FAM.getResult(F); + // FIXME: Redesign the usage of InlineParams to expand the scope of this pass. // In the current implementation, the type of InlineParams doesn't matter as // the pass serves only for verification of inliner's decisions. @@ -3312,7 +3315,7 @@ InlineCostAnnotationPrinterPass::run(Function &F, continue; OptimizationRemarkEmitter ORE(CalledFunction); InlineCostCallAnalyzer ICCA(*CalledFunction, *CB, Params, TTI, - GetAssumptionCache, nullptr, nullptr, &PSI, + GetAssumptionCache, nullptr, nullptr, PSI, &ORE); ICCA.analyze(); OS << " Analyzing call of " << CalledFunction->getName() diff --git a/llvm/lib/Analysis/Lint.cpp b/llvm/lib/Analysis/Lint.cpp index a01672844e0ec..f05e36e2025d4 100644 --- a/llvm/lib/Analysis/Lint.cpp +++ b/llvm/lib/Analysis/Lint.cpp @@ -78,11 +78,6 @@ using namespace llvm; -static const char LintAbortOnErrorArgName[] = "lint-abort-on-error"; -static cl::opt - LintAbortOnError(LintAbortOnErrorArgName, cl::init(false), - cl::desc("In the Lint pass, abort on errors.")); - namespace { namespace MemRef { static const unsigned Read = 1; @@ -747,20 +742,26 @@ PreservedAnalyses LintPass::run(Function &F, FunctionAnalysisManager &AM) { Lint L(Mod, DL, AA, AC, DT, TLI); L.visit(F); dbgs() << L.MessagesStr.str(); - if (LintAbortOnError && !L.MessagesStr.str().empty()) - report_fatal_error(Twine("Linter found errors, aborting. (enabled by --") + - LintAbortOnErrorArgName + ")", - false); + if (AbortOnError && !L.MessagesStr.str().empty()) + report_fatal_error( + "linter found errors, aborting. (enabled by abort-on-error)", false); return PreservedAnalyses::all(); } +void LintPass::printPipeline( + raw_ostream &OS, function_ref MapClassName2PassName) { + PassInfoMixin::printPipeline(OS, MapClassName2PassName); + if (AbortOnError) + OS << ""; +} + //===----------------------------------------------------------------------===// // Implement the public interfaces to this file... //===----------------------------------------------------------------------===// /// lintFunction - Check a function for errors, printing messages on stderr. /// -void llvm::lintFunction(const Function &f) { +void llvm::lintFunction(const Function &f, bool AbortOnError) { Function &F = const_cast(f); assert(!F.isDeclaration() && "Cannot lint external functions"); @@ -775,14 +776,14 @@ void llvm::lintFunction(const Function &f) { AA.registerFunctionAnalysis(); return AA; }); - LintPass().run(F, FAM); + LintPass(AbortOnError).run(F, FAM); } /// lintModule - Check a module for errors, printing messages on stderr. /// -void llvm::lintModule(const Module &M) { +void llvm::lintModule(const Module &M, bool AbortOnError) { for (const Function &F : M) { if (!F.isDeclaration()) - lintFunction(F); + lintFunction(F, AbortOnError); } } diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 7f1b5dc3890a9..dd7b796fd0fdf 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1740,7 +1740,8 @@ bool MemoryDepChecker::Dependence::isForward() const { } bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance, - uint64_t TypeByteSize) { + uint64_t TypeByteSize, + unsigned CommonStride) { // If loads occur at a distance that is not a multiple of a feasible vector // factor store-load forwarding does not take place. // Positive dependences might cause troubles because vectorizing them might @@ -1755,31 +1756,38 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance, // cause any slowdowns. const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize; // Maximum vector factor. - uint64_t MaxVFWithoutSLForwardIssues = std::min( - VectorizerParams::MaxVectorWidth * TypeByteSize, MinDepDistBytes); + uint64_t MaxVFWithoutSLForwardIssuesPowerOf2 = + std::min(VectorizerParams::MaxVectorWidth * TypeByteSize, + MaxStoreLoadForwardSafeDistanceInBits); // Compute the smallest VF at which the store and load would be misaligned. - for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues; - VF *= 2) { + for (uint64_t VF = 2 * TypeByteSize; + VF <= MaxVFWithoutSLForwardIssuesPowerOf2; VF *= 2) { // If the number of vector iteration between the store and the load are // small we could incur conflicts. if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) { - MaxVFWithoutSLForwardIssues = (VF >> 1); + MaxVFWithoutSLForwardIssuesPowerOf2 = (VF >> 1); break; } } - if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) { + if (MaxVFWithoutSLForwardIssuesPowerOf2 < 2 * TypeByteSize) { LLVM_DEBUG( dbgs() << "LAA: Distance " << Distance << " that could cause a store-load forwarding conflict\n"); return true; } - if (MaxVFWithoutSLForwardIssues < MinDepDistBytes && - MaxVFWithoutSLForwardIssues != - VectorizerParams::MaxVectorWidth * TypeByteSize) - MinDepDistBytes = MaxVFWithoutSLForwardIssues; + if (CommonStride && + MaxVFWithoutSLForwardIssuesPowerOf2 < + MaxStoreLoadForwardSafeDistanceInBits && + MaxVFWithoutSLForwardIssuesPowerOf2 != + VectorizerParams::MaxVectorWidth * TypeByteSize) { + uint64_t MaxVF = MaxVFWithoutSLForwardIssuesPowerOf2 / CommonStride; + uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8; + MaxStoreLoadForwardSafeDistanceInBits = + std::min(MaxStoreLoadForwardSafeDistanceInBits, MaxVFInBits); + } return false; } @@ -2227,20 +2235,10 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, std::min(static_cast(MinDistance), MinDepDistBytes); bool IsTrueDataDependence = (!AIsWrite && BIsWrite); - uint64_t MinDepDistBytesOld = MinDepDistBytes; if (IsTrueDataDependence && EnableForwardingConflictDetection && ConstDist && - couldPreventStoreLoadForward(MinDistance, TypeByteSize)) { - // Sanity check that we didn't update MinDepDistBytes when calling - // couldPreventStoreLoadForward - assert(MinDepDistBytes == MinDepDistBytesOld && - "An update to MinDepDistBytes requires an update to " - "MaxSafeVectorWidthInBits"); - (void)MinDepDistBytesOld; + couldPreventStoreLoadForward(MinDistance, TypeByteSize, *CommonStride)) return Dependence::BackwardVectorizableButPreventsForwarding; - } - // An update to MinDepDistBytes requires an update to MaxSafeVectorWidthInBits - // since there is a backwards dependency. uint64_t MaxVF = MinDepDistBytes / *CommonStride; LLVM_DEBUG(dbgs() << "LAA: Positive min distance " << MinDistance << " with max VF = " << MaxVF << '\n'); @@ -3005,6 +3003,11 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { if (!DC.isSafeForAnyVectorWidth()) OS << " with a maximum safe vector width of " << DC.getMaxSafeVectorWidthInBits() << " bits"; + if (!DC.isSafeForAnyStoreLoadForwardDistances()) { + uint64_t SLDist = DC.getStoreLoadForwardSafeDistanceInBits(); + OS << ", with a maximum safe store-load forward width of " << SLDist + << " bits"; + } if (PtrRtChecking->Need) OS << " with run-time checks"; OS << "\n"; diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index fd0a50d25e714..4d25b12c9ab06 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -1024,6 +1024,11 @@ lltok::Kind LLLexer::LexIdentifier() { return lltok::NameTableKind; } + if (Keyword == "Binary" || Keyword == "Decimal" || Keyword == "Rational") { + StrVal.assign(Keyword.begin(), Keyword.end()); + return lltok::FixedPointKind; + } + // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by // the CFE to avoid forcing it to deal with 64-bit numbers. if ((TokStart[0] == 'u' || TokStart[0] == 's') && diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 960119bab0933..b7ebffbeb7187 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -4751,6 +4751,11 @@ struct EmissionKindField : public MDUnsignedField { EmissionKindField() : MDUnsignedField(0, DICompileUnit::LastEmissionKind) {} }; +struct FixedPointKindField : public MDUnsignedField { + FixedPointKindField() + : MDUnsignedField(0, DIFixedPointType::LastFixedPointKind) {} +}; + struct NameTableKindField : public MDUnsignedField { NameTableKindField() : MDUnsignedField( @@ -4994,6 +4999,25 @@ bool LLParser::parseMDField(LocTy Loc, StringRef Name, return false; } +template <> +bool LLParser::parseMDField(LocTy Loc, StringRef Name, + FixedPointKindField &Result) { + if (Lex.getKind() == lltok::APSInt) + return parseMDField(Loc, Name, static_cast(Result)); + + if (Lex.getKind() != lltok::FixedPointKind) + return tokError("expected fixed-point kind"); + + auto Kind = DIFixedPointType::getFixedPointKind(Lex.getStrVal()); + if (!Kind) + return tokError("invalid fixed-point kind" + Twine(" '") + Lex.getStrVal() + + "'"); + assert(*Kind <= Result.Max && "Expected valid fixed-point kind"); + Result.assign(*Kind); + Lex.Lex(); + return false; +} + template <> bool LLParser::parseMDField(LocTy Loc, StringRef Name, NameTableKindField &Result) { @@ -5516,6 +5540,33 @@ bool LLParser::parseDIBasicType(MDNode *&Result, bool IsDistinct) { return false; } +/// parseDIFixedPointType: +/// ::= !DIFixedPointType(tag: DW_TAG_base_type, name: "xyz", size: 32, +/// align: 32, encoding: DW_ATE_signed_fixed, +/// flags: 0, kind: Rational, factor: 3, numerator: 1, +/// denominator: 8) +bool LLParser::parseDIFixedPointType(MDNode *&Result, bool IsDistinct) { +#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \ + OPTIONAL(tag, DwarfTagField, (dwarf::DW_TAG_base_type)); \ + OPTIONAL(name, MDStringField, ); \ + OPTIONAL(size, MDUnsignedField, (0, UINT64_MAX)); \ + OPTIONAL(align, MDUnsignedField, (0, UINT32_MAX)); \ + OPTIONAL(encoding, DwarfAttEncodingField, ); \ + OPTIONAL(flags, DIFlagField, ); \ + OPTIONAL(kind, FixedPointKindField, ); \ + OPTIONAL(factor, MDSignedField, ); \ + OPTIONAL(numerator, MDAPSIntField, ); \ + OPTIONAL(denominator, MDAPSIntField, ); + PARSE_MD_FIELDS(); +#undef VISIT_MD_FIELDS + + Result = GET_OR_DISTINCT(DIFixedPointType, + (Context, tag.Val, name.Val, size.Val, align.Val, + encoding.Val, flags.Val, kind.Val, factor.Val, + numerator.Val, denominator.Val)); + return false; +} + /// parseDIStringType: /// ::= !DIStringType(name: "character(4)", size: 32, align: 32) bool LLParser::parseDIStringType(MDNode *&Result, bool IsDistinct) { diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 40e755902b724..b0d9bcc384101 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -3339,10 +3339,8 @@ Error BitcodeReader::parseConstants() { if (Record.empty()) return error("Invalid aggregate record"); - unsigned Size = Record.size(); SmallVector Elts; - for (unsigned i = 0; i != Size; ++i) - Elts.push_back(Record[i]); + llvm::append_range(Elts, Record); if (isa(CurTy)) { V = BitcodeConstant::create( diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp index e87e5bde63d82..4879569200549 100644 --- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp +++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp @@ -1542,6 +1542,39 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( NextMetadataNo++; break; } + case bitc::METADATA_FIXED_POINT_TYPE: { + if (Record.size() < 11) + return error("Invalid record"); + + IsDistinct = Record[0]; + DINode::DIFlags Flags = static_cast(Record[6]); + + size_t Offset = 9; + + auto ReadWideInt = [&]() { + uint64_t Encoded = Record[Offset++]; + unsigned NumWords = Encoded >> 32; + unsigned BitWidth = Encoded & 0xffffffff; + auto Value = readWideAPInt(ArrayRef(&Record[Offset], NumWords), BitWidth); + Offset += NumWords; + return Value; + }; + + APInt Numerator = ReadWideInt(); + APInt Denominator = ReadWideInt(); + + if (Offset != Record.size()) + return error("Invalid record"); + + MetadataList.assignValue( + GET_OR_DISTINCT(DIFixedPointType, + (Context, Record[1], getMDString(Record[2]), Record[3], + Record[4], Record[5], Flags, Record[7], Record[8], + Numerator, Denominator)), + NextMetadataNo); + NextMetadataNo++; + break; + } case bitc::METADATA_STRING_TYPE: { if (Record.size() > 9 || Record.size() < 8) return error("Invalid record"); diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 34ba25dccc368..4a0db9d76f44a 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -323,6 +323,9 @@ class ModuleBitcodeWriter : public ModuleBitcodeWriterBase { SmallVectorImpl &Record, unsigned Abbrev); void writeDIBasicType(const DIBasicType *N, SmallVectorImpl &Record, unsigned Abbrev); + void writeDIFixedPointType(const DIFixedPointType *N, + SmallVectorImpl &Record, + unsigned Abbrev); void writeDIStringType(const DIStringType *N, SmallVectorImpl &Record, unsigned Abbrev); void writeDIDerivedType(const DIDerivedType *N, @@ -1209,8 +1212,7 @@ void ModuleBitcodeWriter::writeTypeTable() { TypeVals.push_back(TET->getNumTypeParameters()); for (Type *InnerTy : TET->type_params()) TypeVals.push_back(VE.getTypeID(InnerTy)); - for (unsigned IntParam : TET->int_params()) - TypeVals.push_back(IntParam); + llvm::append_range(TypeVals, TET->int_params()); break; } case Type::TypedPointerTyID: @@ -1888,6 +1890,35 @@ void ModuleBitcodeWriter::writeDIBasicType(const DIBasicType *N, Record.clear(); } +void ModuleBitcodeWriter::writeDIFixedPointType( + const DIFixedPointType *N, SmallVectorImpl &Record, + unsigned Abbrev) { + Record.push_back(N->isDistinct()); + Record.push_back(N->getTag()); + Record.push_back(VE.getMetadataOrNullID(N->getRawName())); + Record.push_back(N->getSizeInBits()); + Record.push_back(N->getAlignInBits()); + Record.push_back(N->getEncoding()); + Record.push_back(N->getFlags()); + Record.push_back(N->getKind()); + Record.push_back(N->getFactorRaw()); + + auto WriteWideInt = [&](const APInt &Value) { + // Write an encoded word that holds the number of active words and + // the number of bits. + uint64_t NumWords = Value.getActiveWords(); + uint64_t Encoded = (NumWords << 32) | Value.getBitWidth(); + Record.push_back(Encoded); + emitWideAPInt(Record, Value); + }; + + WriteWideInt(N->getNumeratorRaw()); + WriteWideInt(N->getDenominatorRaw()); + + Stream.EmitRecord(bitc::METADATA_FIXED_POINT_TYPE, Record, Abbrev); + Record.clear(); +} + void ModuleBitcodeWriter::writeDIStringType(const DIStringType *N, SmallVectorImpl &Record, unsigned Abbrev) { @@ -4303,10 +4334,8 @@ static void writeFunctionHeapProfileRecords( } for (auto Id : CI.StackIdIndices) Record.push_back(GetStackIndex(Id)); - if (!PerModule) { - for (auto V : CI.Clones) - Record.push_back(V); - } + if (!PerModule) + llvm::append_range(Record, CI.Clones); Stream.EmitRecord(PerModule ? bitc::FS_PERMODULE_CALLSITE_INFO : bitc::FS_COMBINED_CALLSITE_INFO, Record, CallsiteAbbrev); @@ -4326,10 +4355,8 @@ static void writeFunctionHeapProfileRecords( assert(CallStackCount <= CallStackPos.size()); Record.push_back(CallStackPos[CallStackCount++]); } - if (!PerModule) { - for (auto V : AI.Versions) - Record.push_back(V); - } + if (!PerModule) + llvm::append_range(Record, AI.Versions); assert(AI.ContextSizeInfos.empty() || AI.ContextSizeInfos.size() == AI.MIBs.size()); // Optionally emit the context size information if it exists. diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index c626202753824..2d76aa5488333 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -2769,6 +2769,13 @@ namespace { } // end anonymous namespace +StringRef AsmPrinter::getConstantSectionSuffix(const Constant *C) const { + if (TM.Options.EnableStaticDataPartitioning && C && SDPI && PSI) + return SDPI->getConstantSectionPrefix(C, PSI); + + return ""; +} + /// EmitConstantPool - Print to the current output stream assembly /// representations of the constants in the constant pool MCP. This is /// used to print out constants which have been "spilled to memory" by @@ -2792,7 +2799,7 @@ void AsmPrinter::emitConstantPool() { C = CPE.Val.ConstVal; MCSection *S = getObjFileLowering().getSectionForConstant( - getDataLayout(), Kind, C, Alignment); + getDataLayout(), Kind, C, Alignment, getConstantSectionSuffix(C)); // The number of sections are small, just do a linear search from the // last section to the first. diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 081828ea358af..2723b1f55ccaa 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -615,7 +615,9 @@ DIE *DwarfUnit::createTypeDIE(const DIScope *Context, DIE &ContextDIE, return &TyDIE; } construct(CTy); - } else if (auto *BT = dyn_cast(Ty)) + } else if (auto *FPT = dyn_cast(Ty)) + construct(FPT); + else if (auto *BT = dyn_cast(Ty)) construct(BT); else if (auto *ST = dyn_cast(Ty)) construct(ST); @@ -760,6 +762,30 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIBasicType *BTy) { NumExtraInhabitants); } +void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIFixedPointType *BTy) { + // Base type handling. + constructTypeDIE(Buffer, static_cast(BTy)); + + if (BTy->isBinary()) + addSInt(Buffer, dwarf::DW_AT_binary_scale, dwarf::DW_FORM_sdata, + BTy->getFactor()); + else if (BTy->isDecimal()) + addSInt(Buffer, dwarf::DW_AT_decimal_scale, dwarf::DW_FORM_sdata, + BTy->getFactor()); + else { + assert(BTy->isRational()); + DIE *ContextDIE = getOrCreateContextDIE(BTy->getScope()); + DIE &Constant = createAndAddDIE(dwarf::DW_TAG_constant, *ContextDIE); + + addInt(Constant, dwarf::DW_AT_GNU_numerator, BTy->getNumerator(), + !BTy->isSigned()); + addInt(Constant, dwarf::DW_AT_GNU_denominator, BTy->getDenominator(), + !BTy->isSigned()); + + addDIEEntry(Buffer, dwarf::DW_AT_small, Constant); + } +} + void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIStringType *STy) { // Get core information. StringRef Name = STy->getName(); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h index 5b0da7b09d31c..055d7173daec5 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -343,6 +343,7 @@ class DwarfUnit : public DIEUnit { void addIntAsBlock(DIE &Die, dwarf::Attribute Attribute, const APInt &Val); void constructTypeDIE(DIE &Buffer, const DIBasicType *BTy); + void constructTypeDIE(DIE &Buffer, const DIFixedPointType *BTy); void constructTypeDIE(DIE &Buffer, const DIStringType *BTy); void constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy); void constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 4487b9d510cc7..dc5c5f38e3bd8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -25532,26 +25532,24 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { // Handle only simple case where vector being inserted and vector // being extracted are of same size. EVT SmallVT = V.getOperand(1).getValueType(); - if (!NVT.bitsEq(SmallVT)) - return SDValue(); - - // Combine: - // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) - // Into: - // indices are equal or bit offsets are equal => V1 - // otherwise => (extract_subvec V1, ExtIdx) - uint64_t InsIdx = V.getConstantOperandVal(2); - if (InsIdx * SmallVT.getScalarSizeInBits() == - ExtIdx * NVT.getScalarSizeInBits()) { - if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT)) - return SDValue(); - - return DAG.getBitcast(NVT, V.getOperand(1)); + if (NVT.bitsEq(SmallVT)) { + // Combine: + // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) + // Into: + // indices are equal or bit offsets are equal => V1 + // otherwise => (extract_subvec V1, ExtIdx) + uint64_t InsIdx = V.getConstantOperandVal(2); + if (InsIdx * SmallVT.getScalarSizeInBits() == + ExtIdx * NVT.getScalarSizeInBits()) { + if (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT)) + return DAG.getBitcast(NVT, V.getOperand(1)); + } else { + return DAG.getNode( + ISD::EXTRACT_SUBVECTOR, DL, NVT, + DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)), + N->getOperand(1)); + } } - return DAG.getNode( - ISD::EXTRACT_SUBVECTOR, DL, NVT, - DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)), - N->getOperand(1)); } if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations)) diff --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp index e823df3186a54..eb07e5d2bae4b 100644 --- a/llvm/lib/CodeGen/StackProtector.cpp +++ b/llvm/lib/CodeGen/StackProtector.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -625,18 +626,11 @@ bool InsertStackProtectors(const TargetMachine *TM, Function *F, HasIRCheck = true; // If we're instrumenting a block with a tail call, the check has to be - // inserted before the call rather than between it and the return. The - // verifier guarantees that a tail call is either directly before the - // return or with a single correct bitcast of the return value in between so - // we don't need to worry about many situations here. + // inserted before the call rather than between it and the return. Instruction *Prev = CheckLoc->getPrevNonDebugInstruction(); - if (Prev && isa(Prev) && cast(Prev)->isTailCall()) - CheckLoc = Prev; - else if (Prev) { - Prev = Prev->getPrevNonDebugInstruction(); - if (Prev && isa(Prev) && cast(Prev)->isTailCall()) + if (auto *CI = dyn_cast_if_present(Prev)) + if (CI->isTailCall() && isInTailCallPosition(*CI, *TM)) CheckLoc = Prev; - } // Generate epilogue instrumentation. The epilogue intrumentation can be // function-based or inlined depending on which mechanism the target is diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp index 60501b4495082..8e12c5e5439ba 100644 --- a/llvm/lib/CodeGen/StaticDataSplitter.cpp +++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp @@ -10,7 +10,7 @@ // for the following types of static data: // - Jump tables // - Module-internal global variables -// - Constant pools (TODO) +// - Constant pools // // For the original RFC of this pass please see // https://discourse.llvm.org/t/rfc-profile-guided-static-data-partitioning/83744 @@ -60,8 +60,8 @@ class StaticDataSplitter : public MachineFunctionPass { // Returns the constant if the operand refers to a global variable or constant // that gets lowered to static data sections. Otherwise, return nullptr. - const Constant *getConstant(const MachineOperand &Op, - const TargetMachine &TM); + const Constant *getConstant(const MachineOperand &Op, const TargetMachine &TM, + const MachineConstantPool *MCP); // Use profiles to partition static data. bool partitionStaticDataWithProfiles(MachineFunction &MF); @@ -89,8 +89,11 @@ class StaticDataSplitter : public MachineFunctionPass { AU.addRequired(); AU.addRequired(); AU.addRequired(); - // This pass does not modify the CFG. - AU.setPreservesCFG(); + // This pass does not modify any required analysis results except + // StaticDataProfileInfoWrapperPass, but StaticDataProfileInfoWrapperPass + // is made an immutable pass that it won't be re-scheduled by pass manager + // anyway. So mark setPreservesAll() here for faster compile time. + AU.setPreservesAll(); } bool runOnMachineFunction(MachineFunction &MF) override; @@ -119,40 +122,63 @@ bool StaticDataSplitter::runOnMachineFunction(MachineFunction &MF) { return Changed; } -const Constant *StaticDataSplitter::getConstant(const MachineOperand &Op, - const TargetMachine &TM) { - if (!Op.isGlobal()) +const Constant * +StaticDataSplitter::getConstant(const MachineOperand &Op, + const TargetMachine &TM, + const MachineConstantPool *MCP) { + if (!Op.isGlobal() && !Op.isCPI()) return nullptr; - // Find global variables with local linkage. - const GlobalVariable *GV = getLocalLinkageGlobalVariable(Op.getGlobal()); - // Skip 'llvm.'-prefixed global variables conservatively because they are - // often handled specially, and skip those not in static data sections. - if (!GV || GV->getName().starts_with("llvm.") || - !inStaticDataSection(*GV, TM)) + if (Op.isGlobal()) { + // Find global variables with local linkage. + const GlobalVariable *GV = getLocalLinkageGlobalVariable(Op.getGlobal()); + // Skip 'llvm.'-prefixed global variables conservatively because they are + // often handled specially, and skip those not in static data + // sections. + if (!GV || GV->getName().starts_with("llvm.") || + !inStaticDataSection(*GV, TM)) + return nullptr; + return GV; + } + assert(Op.isCPI() && "Op must be constant pool index in this branch"); + int CPI = Op.getIndex(); + if (CPI == -1) + return nullptr; + + assert(MCP != nullptr && "Constant pool info is not available."); + const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI]; + + if (CPE.isMachineConstantPoolEntry()) return nullptr; - return GV; + + return CPE.Val.ConstVal; } bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) { - int NumChangedJumpTables = 0; + // If any of the static data (jump tables, global variables, constant pools) + // are captured by the analysis, set `Changed` to true. Note this pass won't + // invalidate any analysis pass (see `getAnalysisUsage` above), so the main + // purpose of tracking and conveying the change (to pass manager) is + // informative as opposed to invalidating any analysis results. As an example + // of where this information is useful, `PMDataManager::dumpPassInfo` will + // only dump pass info if a local change happens, otherwise a pass appears as + // "skipped". + bool Changed = false; - const TargetMachine &TM = MF.getTarget(); MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); // Jump table could be used by either terminating instructions or // non-terminating ones, so we walk all instructions and use // `MachineOperand::isJTI()` to identify jump table operands. - // Similarly, `MachineOperand::isCPI()` can identify constant pool usages - // in the same loop. + // Similarly, `MachineOperand::isCPI()` is used to identify constant pool + // usages in the same loop. for (const auto &MBB : MF) { + std::optional Count = MBFI->getBlockProfileCount(&MBB); for (const MachineInstr &I : MBB) { for (const MachineOperand &Op : I.operands()) { - if (!Op.isJTI() && !Op.isGlobal()) + if (!Op.isJTI() && !Op.isGlobal() && !Op.isCPI()) continue; - std::optional Count = MBFI->getBlockProfileCount(&MBB); - if (Op.isJTI()) { assert(MJTI != nullptr && "Jump table info is not available."); const int JTI = Op.getIndex(); @@ -168,15 +194,16 @@ bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) { if (Count && PSI->isColdCount(*Count)) Hotness = MachineFunctionDataHotness::Cold; - if (MJTI->updateJumpTableEntryHotness(JTI, Hotness)) - ++NumChangedJumpTables; - } else if (const Constant *C = getConstant(Op, TM)) { + Changed |= MJTI->updateJumpTableEntryHotness(JTI, Hotness); + } else if (const Constant *C = + getConstant(Op, MF.getTarget(), MF.getConstantPool())) { SDPI->addConstantProfileCount(C, Count); + Changed = true; } } } } - return NumChangedJumpTables > 0; + return Changed; } const GlobalVariable * @@ -218,7 +245,8 @@ void StaticDataSplitter::annotateStaticDataWithoutProfiles( for (const auto &MBB : MF) for (const MachineInstr &I : MBB) for (const MachineOperand &Op : I.operands()) - if (const Constant *C = getConstant(Op, MF.getTarget())) + if (const Constant *C = + getConstant(Op, MF.getTarget(), MF.getConstantPool())) SDPI->addConstantProfileCount(C, std::nullopt); } diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index dd6d85e3662db..4c20c5dc74d9a 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -1068,6 +1068,41 @@ MCSection *TargetLoweringObjectFileELF::getSectionForConstant( return DataRelROSection; } +MCSection *TargetLoweringObjectFileELF::getSectionForConstant( + const DataLayout &DL, SectionKind Kind, const Constant *C, Align &Alignment, + StringRef SectionSuffix) const { + // TODO: Share code between this function and + // MCObjectInfo::initELFMCObjectFileInfo. + if (SectionSuffix.empty()) + return getSectionForConstant(DL, Kind, C, Alignment); + + auto &Context = getContext(); + if (Kind.isMergeableConst4() && MergeableConst4Section) + return Context.getELFSection(".rodata.cst4." + SectionSuffix, + ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_MERGE, 4); + if (Kind.isMergeableConst8() && MergeableConst8Section) + return Context.getELFSection(".rodata.cst8." + SectionSuffix, + ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_MERGE, 8); + if (Kind.isMergeableConst16() && MergeableConst16Section) + return Context.getELFSection(".rodata.cst16." + SectionSuffix, + ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_MERGE, 16); + if (Kind.isMergeableConst32() && MergeableConst32Section) + return Context.getELFSection(".rodata.cst32." + SectionSuffix, + ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_MERGE, 32); + if (Kind.isReadOnly()) + return Context.getELFSection(".rodata." + SectionSuffix, ELF::SHT_PROGBITS, + ELF::SHF_ALLOC); + + assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); + return Context.getELFSection(".data.rel.ro." + SectionSuffix, + ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_WRITE); +} + /// Returns a unique section for the given machine basic block. MCSection *TargetLoweringObjectFileELF::getSectionForMachineBasicBlock( const Function &F, const MachineBasicBlock &MBB, diff --git a/llvm/lib/DebugInfo/LogicalView/Core/LVOptions.cpp b/llvm/lib/DebugInfo/LogicalView/Core/LVOptions.cpp index c8789cb959fb7..8050c0efdd7cb 100644 --- a/llvm/lib/DebugInfo/LogicalView/Core/LVOptions.cpp +++ b/llvm/lib/DebugInfo/LogicalView/Core/LVOptions.cpp @@ -445,8 +445,7 @@ void LVPatterns::addGenericPatterns(StringSet<> &Patterns) { } void LVPatterns::addOffsetPatterns(const LVOffsetSet &Patterns) { - for (const LVOffset &Entry : Patterns) - OffsetMatchInfo.push_back(Entry); + llvm::append_range(OffsetMatchInfo, Patterns); if (OffsetMatchInfo.size()) { options().setSelectOffsetPattern(); options().setSelectExecute(); diff --git a/llvm/lib/DebugInfo/LogicalView/Core/LVSymbol.cpp b/llvm/lib/DebugInfo/LogicalView/Core/LVSymbol.cpp index 4608fe20cb6df..44d073387206e 100644 --- a/llvm/lib/DebugInfo/LogicalView/Core/LVSymbol.cpp +++ b/llvm/lib/DebugInfo/LogicalView/Core/LVSymbol.cpp @@ -182,8 +182,7 @@ void LVSymbol::getLocations(LVLocations &LocationList) const { if (!Locations) return; - for (LVLocation *Location : *Locations) - LocationList.push_back(Location); + llvm::append_range(LocationList, *Locations); } // Calculate coverage factor. diff --git a/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewReader.cpp b/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewReader.cpp index 8074f1a9fddfb..e5895516b5e77 100644 --- a/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewReader.cpp +++ b/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewReader.cpp @@ -163,8 +163,7 @@ void LVCodeViewReader::cacheRelocations() { const coff_section *CoffSection = getObj().getCOFFSection(Section); auto &RM = RelocMap[CoffSection]; - for (const RelocationRef &Relocacion : Section.relocations()) - RM.push_back(Relocacion); + llvm::append_range(RM, Section.relocations()); // Sort relocations by address. llvm::sort(RM, [](RelocationRef L, RelocationRef R) { diff --git a/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp b/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp index 72e5f701f89a7..abe9ae7a3da16 100644 --- a/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp @@ -70,8 +70,7 @@ Error EHFrameEdgeFixer::operator()(LinkGraph &G) { // Sort eh-frame blocks into address order to ensure we visit CIEs before // their child FDEs. std::vector EHFrameBlocks; - for (auto *B : EHFrame->blocks()) - EHFrameBlocks.push_back(B); + llvm::append_range(EHFrameBlocks, EHFrame->blocks()); llvm::sort(EHFrameBlocks, [](const Block *LHS, const Block *RHS) { return LHS->getAddress() < RHS->getAddress(); }); diff --git a/llvm/lib/ExecutionEngine/Orc/COFFVCRuntimeSupport.cpp b/llvm/lib/ExecutionEngine/Orc/COFFVCRuntimeSupport.cpp index 94f696fa20863..c785381175284 100644 --- a/llvm/lib/ExecutionEngine/Orc/COFFVCRuntimeSupport.cpp +++ b/llvm/lib/ExecutionEngine/Orc/COFFVCRuntimeSupport.cpp @@ -86,8 +86,7 @@ Error COFFVCRuntimeBootstrapper::loadVCRuntime( if (!G) return G.takeError(); - for (auto &Lib : (*G)->getImportedDynamicLibraries()) - ImportedLibraries.push_back(Lib); + llvm::append_range(ImportedLibraries, (*G)->getImportedDynamicLibraries()); JD.addGenerator(std::move(*G)); diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp index d6673552e39fd..cbed057950aea 100644 --- a/llvm/lib/ExecutionEngine/Orc/Core.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp @@ -127,8 +127,7 @@ void UnsatisfiedSymbolDependencies::log(raw_ostream &OS) const { SymbolsNotFound::SymbolsNotFound(std::shared_ptr SSP, SymbolNameSet Symbols) : SSP(std::move(SSP)) { - for (auto &Sym : Symbols) - this->Symbols.push_back(Sym); + llvm::append_range(this->Symbols, Symbols); assert(!this->Symbols.empty() && "Can not fail to resolve an empty set"); } @@ -2387,8 +2386,8 @@ void ExecutionSession::OL_applyQueryPhase1( // Build the definition generator stack for this JITDylib. runSessionLocked([&] { IPLS->CurDefGeneratorStack.reserve(JD.DefGenerators.size()); - for (auto &DG : reverse(JD.DefGenerators)) - IPLS->CurDefGeneratorStack.push_back(DG); + llvm::append_range(IPLS->CurDefGeneratorStack, + reverse(JD.DefGenerators)); }); // Flag that we've done our initialization. diff --git a/llvm/lib/ExecutionEngine/Orc/IRPartitionLayer.cpp b/llvm/lib/ExecutionEngine/Orc/IRPartitionLayer.cpp index 9ad171beac7fe..1a37469c35d2e 100644 --- a/llvm/lib/ExecutionEngine/Orc/IRPartitionLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/IRPartitionLayer.cpp @@ -260,8 +260,7 @@ void IRPartitionLayer::emitPartition( { std::vector HashGVs; HashGVs.reserve(GVsToExtract->size()); - for (const auto *GV : *GVsToExtract) - HashGVs.push_back(GV); + llvm::append_range(HashGVs, *GVsToExtract); llvm::sort(HashGVs, [](const GlobalValue *LHS, const GlobalValue *RHS) { return LHS->getName() < RHS->getName(); }); diff --git a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp index 7d385f4cf2fbb..21ebe82c8a71a 100644 --- a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp +++ b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp @@ -57,8 +57,7 @@ Function *addHelperAndWrapper(Module &M, StringRef WrapperName, std::vector HelperArgTypes; for (auto *Arg : HelperPrefixArgs) HelperArgTypes.push_back(Arg->getType()); - for (auto *T : WrapperFnType->params()) - HelperArgTypes.push_back(T); + llvm::append_range(HelperArgTypes, WrapperFnType->params()); auto *HelperFnType = FunctionType::get(WrapperFnType->getReturnType(), HelperArgTypes, false); auto *HelperFn = Function::Create(HelperFnType, GlobalValue::ExternalLinkage, @@ -72,8 +71,7 @@ Function *addHelperAndWrapper(Module &M, StringRef WrapperName, IRBuilder<> IB(EntryBlock); std::vector HelperArgs; - for (auto *Arg : HelperPrefixArgs) - HelperArgs.push_back(Arg); + llvm::append_range(HelperArgs, HelperPrefixArgs); for (auto &Arg : WrapperFn->args()) HelperArgs.push_back(&Arg); auto *HelperResult = IB.CreateCall(HelperFn, HelperArgs); diff --git a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp index 498d438bc25d4..90194d7fcc119 100644 --- a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp +++ b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp @@ -1676,10 +1676,8 @@ Error MachOPlatform::MachOPlatformPlugin::prepareSymbolTableRegistration( // those names. { SmallVector SymsToProcess; - for (auto *Sym : G.defined_symbols()) - SymsToProcess.push_back(Sym); - for (auto *Sym : G.absolute_symbols()) - SymsToProcess.push_back(Sym); + llvm::append_range(SymsToProcess, G.defined_symbols()); + llvm::append_range(SymsToProcess, G.absolute_symbols()); for (auto *Sym : SymsToProcess) { if (!Sym->hasName()) diff --git a/llvm/lib/ExecutionEngine/Orc/MapperJITLinkMemoryManager.cpp b/llvm/lib/ExecutionEngine/Orc/MapperJITLinkMemoryManager.cpp index 93fe7eeb3ed5b..33734b8253689 100644 --- a/llvm/lib/ExecutionEngine/Orc/MapperJITLinkMemoryManager.cpp +++ b/llvm/lib/ExecutionEngine/Orc/MapperJITLinkMemoryManager.cpp @@ -43,7 +43,7 @@ class MapperJITLinkMemoryManager::InFlightAlloc } void abandon(OnAbandonedFunction OnFinalize) override { - Parent.Mapper->release({AllocAddr}, std::move(OnFinalize)); + Parent.Mapper->deinitialize({AllocAddr}, std::move(OnFinalize)); } private: diff --git a/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp b/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp index 53d5e049798aa..3d816785cb76d 100644 --- a/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp +++ b/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp @@ -47,8 +47,7 @@ lookupSymbolsAsyncHelper(EPCGenericDylibManager &DylibMgr, return Complete(R.takeError()); Result.push_back({}); Result.back().reserve(R->size()); - for (auto Addr : *R) - Result.back().push_back(Addr); + llvm::append_range(Result.back(), *R); lookupSymbolsAsyncHelper( DylibMgr, Request.drop_front(), std::move(Result), diff --git a/llvm/lib/FileCheck/FileCheck.cpp b/llvm/lib/FileCheck/FileCheck.cpp index 10ca5f4d122bc..71b47a04fd131 100644 --- a/llvm/lib/FileCheck/FileCheck.cpp +++ b/llvm/lib/FileCheck/FileCheck.cpp @@ -1640,13 +1640,11 @@ static const char *DefaultCommentPrefixes[] = {"COM", "RUN"}; static void addDefaultPrefixes(FileCheckRequest &Req) { if (Req.CheckPrefixes.empty()) { - for (const char *Prefix : DefaultCheckPrefixes) - Req.CheckPrefixes.push_back(Prefix); + llvm::append_range(Req.CheckPrefixes, DefaultCheckPrefixes); Req.IsDefaultCheckPrefix = true; } if (Req.CommentPrefixes.empty()) - for (const char *Prefix : DefaultCommentPrefixes) - Req.CommentPrefixes.push_back(Prefix); + llvm::append_range(Req.CommentPrefixes, DefaultCommentPrefixes); } struct PrefixMatcher { diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index 79547b299a903..5f0a9cdfb941a 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -1894,6 +1894,7 @@ struct MDFieldPrinter { void printEmissionKind(StringRef Name, DICompileUnit::DebugEmissionKind EK); void printNameTableKind(StringRef Name, DICompileUnit::DebugNameTableKind NTK); + void printFixedPointKind(StringRef Name, DIFixedPointType::FixedPointKind V); }; } // end anonymous namespace @@ -2030,6 +2031,11 @@ void MDFieldPrinter::printNameTableKind(StringRef Name, Out << FS << Name << ": " << DICompileUnit::nameTableKindString(NTK); } +void MDFieldPrinter::printFixedPointKind(StringRef Name, + DIFixedPointType::FixedPointKind V) { + Out << FS << Name << ": " << DIFixedPointType::fixedPointKindString(V); +} + template void MDFieldPrinter::printDwarfEnum(StringRef Name, IntTy Value, Stringifier toString, bool ShouldSkipZero) { @@ -2199,6 +2205,29 @@ static void writeDIBasicType(raw_ostream &Out, const DIBasicType *N, Out << ")"; } +static void writeDIFixedPointType(raw_ostream &Out, const DIFixedPointType *N, + AsmWriterContext &) { + Out << "!DIFixedPointType("; + MDFieldPrinter Printer(Out); + if (N->getTag() != dwarf::DW_TAG_base_type) + Printer.printTag(N); + Printer.printString("name", N->getName()); + Printer.printInt("size", N->getSizeInBits()); + Printer.printInt("align", N->getAlignInBits()); + Printer.printDwarfEnum("encoding", N->getEncoding(), + dwarf::AttributeEncodingString); + Printer.printDIFlags("flags", N->getFlags()); + Printer.printFixedPointKind("kind", N->getKind()); + if (N->isRational()) { + bool IsUnsigned = !N->isSigned(); + Printer.printAPInt("numerator", N->getNumerator(), IsUnsigned, false); + Printer.printAPInt("denominator", N->getDenominator(), IsUnsigned, false); + } else { + Printer.printInt("factor", N->getFactor()); + } + Out << ")"; +} + static void writeDIStringType(raw_ostream &Out, const DIStringType *N, AsmWriterContext &WriterCtx) { Out << "!DIStringType("; diff --git a/llvm/lib/IR/DIBuilder.cpp b/llvm/lib/IR/DIBuilder.cpp index 3c1fd433fb948..d9cc49fdad89c 100644 --- a/llvm/lib/IR/DIBuilder.cpp +++ b/llvm/lib/IR/DIBuilder.cpp @@ -272,6 +272,37 @@ DIBasicType *DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits, 0, Encoding, NumExtraInhabitants, Flags); } +DIFixedPointType * +DIBuilder::createBinaryFixedPointType(StringRef Name, uint64_t SizeInBits, + uint32_t AlignInBits, unsigned Encoding, + DINode::DIFlags Flags, int Factor) { + return DIFixedPointType::get(VMContext, dwarf::DW_TAG_base_type, Name, + SizeInBits, AlignInBits, Encoding, Flags, + DIFixedPointType::FixedPointBinary, Factor, + APInt(), APInt()); +} + +DIFixedPointType * +DIBuilder::createDecimalFixedPointType(StringRef Name, uint64_t SizeInBits, + uint32_t AlignInBits, unsigned Encoding, + DINode::DIFlags Flags, int Factor) { + return DIFixedPointType::get(VMContext, dwarf::DW_TAG_base_type, Name, + SizeInBits, AlignInBits, Encoding, Flags, + DIFixedPointType::FixedPointDecimal, Factor, + APInt(), APInt()); +} + +DIFixedPointType * +DIBuilder::createRationalFixedPointType(StringRef Name, uint64_t SizeInBits, + uint32_t AlignInBits, unsigned Encoding, + DINode::DIFlags Flags, APInt Numerator, + APInt Denominator) { + return DIFixedPointType::get(VMContext, dwarf::DW_TAG_base_type, Name, + SizeInBits, AlignInBits, Encoding, Flags, + DIFixedPointType::FixedPointRational, 0, + Numerator, Denominator); +} + DIStringType *DIBuilder::createStringType(StringRef Name, uint64_t SizeInBits) { assert(!Name.empty() && "Unable to create type without name"); return DIStringType::get(VMContext, dwarf::DW_TAG_string_type, Name, diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp index ae3d79fc17a59..f8c24d896df32 100644 --- a/llvm/lib/IR/DebugInfoMetadata.cpp +++ b/llvm/lib/IR/DebugInfoMetadata.cpp @@ -721,15 +721,58 @@ std::optional DIBasicType::getSignedness() const { switch (getEncoding()) { case dwarf::DW_ATE_signed: case dwarf::DW_ATE_signed_char: + case dwarf::DW_ATE_signed_fixed: return Signedness::Signed; case dwarf::DW_ATE_unsigned: case dwarf::DW_ATE_unsigned_char: + case dwarf::DW_ATE_unsigned_fixed: return Signedness::Unsigned; default: return std::nullopt; } } +DIFixedPointType * +DIFixedPointType::getImpl(LLVMContext &Context, unsigned Tag, MDString *Name, + uint64_t SizeInBits, uint32_t AlignInBits, + unsigned Encoding, DIFlags Flags, unsigned Kind, + int Factor, APInt Numerator, APInt Denominator, + StorageType Storage, bool ShouldCreate) { + DEFINE_GETIMPL_LOOKUP(DIFixedPointType, + (Tag, Name, SizeInBits, AlignInBits, Encoding, Flags, + Kind, Factor, Numerator, Denominator)); + Metadata *Ops[] = {nullptr, nullptr, Name}; + DEFINE_GETIMPL_STORE(DIFixedPointType, + (Tag, SizeInBits, AlignInBits, Encoding, Flags, Kind, + Factor, Numerator, Denominator), + Ops); +} + +bool DIFixedPointType::isSigned() const { + return getEncoding() == dwarf::DW_ATE_signed_fixed; +} + +std::optional +DIFixedPointType::getFixedPointKind(StringRef Str) { + return StringSwitch>(Str) + .Case("Binary", FixedPointBinary) + .Case("Decimal", FixedPointDecimal) + .Case("Rational", FixedPointRational) + .Default(std::nullopt); +} + +const char *DIFixedPointType::fixedPointKindString(FixedPointKind V) { + switch (V) { + case FixedPointBinary: + return "Binary"; + case FixedPointDecimal: + return "Decimal"; + case FixedPointRational: + return "Rational"; + } + return nullptr; +} + DIStringType *DIStringType::getImpl(LLVMContext &Context, unsigned Tag, MDString *Name, Metadata *StringLength, Metadata *StringLengthExp, diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h index a18cf6f205623..efabe40fab192 100644 --- a/llvm/lib/IR/LLVMContextImpl.h +++ b/llvm/lib/IR/LLVMContextImpl.h @@ -494,6 +494,43 @@ template <> struct MDNodeKeyImpl { } }; +template <> struct MDNodeKeyImpl { + unsigned Tag; + MDString *Name; + uint64_t SizeInBits; + uint32_t AlignInBits; + unsigned Encoding; + unsigned Flags; + unsigned Kind; + int Factor; + APInt Numerator; + APInt Denominator; + + MDNodeKeyImpl(unsigned Tag, MDString *Name, uint64_t SizeInBits, + uint32_t AlignInBits, unsigned Encoding, unsigned Flags, + unsigned Kind, int Factor, APInt Numerator, APInt Denominator) + : Tag(Tag), Name(Name), SizeInBits(SizeInBits), AlignInBits(AlignInBits), + Encoding(Encoding), Flags(Flags), Kind(Kind), Factor(Factor), + Numerator(Numerator), Denominator(Denominator) {} + MDNodeKeyImpl(const DIFixedPointType *N) + : Tag(N->getTag()), Name(N->getRawName()), SizeInBits(N->getSizeInBits()), + AlignInBits(N->getAlignInBits()), Encoding(N->getEncoding()), + Flags(N->getFlags()), Kind(N->getKind()), Factor(N->getFactorRaw()), + Numerator(N->getNumeratorRaw()), Denominator(N->getDenominatorRaw()) {} + + bool isKeyOf(const DIFixedPointType *RHS) const { + return Name == RHS->getRawName() && SizeInBits == RHS->getSizeInBits() && + AlignInBits == RHS->getAlignInBits() && Kind == RHS->getKind() && + (RHS->isRational() ? (Numerator == RHS->getNumerator() && + Denominator == RHS->getDenominator()) + : Factor == RHS->getFactor()); + } + + unsigned getHashValue() const { + return hash_combine(Name, Flags, Kind, Factor, Numerator, Denominator); + } +}; + template <> struct MDNodeKeyImpl { unsigned Tag; MDString *Name; diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index ed86a10c3a25f..95dd3aa86b428 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -1239,6 +1239,25 @@ void Verifier::visitDIBasicType(const DIBasicType &N) { "invalid tag", &N); } +void Verifier::visitDIFixedPointType(const DIFixedPointType &N) { + visitDIBasicType(N); + + CheckDI(N.getTag() == dwarf::DW_TAG_base_type, "invalid tag", &N); + CheckDI(N.getEncoding() == dwarf::DW_ATE_signed_fixed || + N.getEncoding() == dwarf::DW_ATE_unsigned_fixed, + "invalid encoding", &N); + CheckDI(N.getKind() == DIFixedPointType::FixedPointBinary || + N.getKind() == DIFixedPointType::FixedPointDecimal || + N.getKind() == DIFixedPointType::FixedPointRational, + "invalid kind", &N); + CheckDI(N.getKind() != DIFixedPointType::FixedPointRational || + N.getFactorRaw() == 0, + "factor should be 0 for rationals", &N); + CheckDI(N.getKind() == DIFixedPointType::FixedPointRational || + (N.getNumeratorRaw() == 0 && N.getDenominatorRaw() == 0), + "numerator and denominator should be 0 for non-rationals", &N); +} + void Verifier::visitDIStringType(const DIStringType &N) { CheckDI(N.getTag() == dwarf::DW_TAG_string_type, "invalid tag", &N); CheckDI(!(N.isBigEndian() && N.isLittleEndian()), "has conflicting flags", diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp index dd45a94ea892a..fa5c3dab1f115 100644 --- a/llvm/lib/MC/MCExpr.cpp +++ b/llvm/lib/MC/MCExpr.cpp @@ -37,10 +37,22 @@ STATISTIC(MCExprEvaluate, "Number of MCExpr evaluations"); } // end namespace stats } // end anonymous namespace +static int getPrecedence(MCBinaryExpr::Opcode Op) { + switch (Op) { + case MCBinaryExpr::Add: + case MCBinaryExpr::Sub: + return 1; + default: + return 0; + } +} + // VariantKind printing and formatting utilize MAI. operator<< (dump and some // target code) specifies MAI as nullptr and should be avoided when MAI is // needed. -void MCExpr::print(raw_ostream &OS, const MCAsmInfo *MAI, bool InParens) const { +void MCExpr::print(raw_ostream &OS, const MCAsmInfo *MAI, + int SurroundingPrec) const { + constexpr int MaxPrec = 9; switch (getKind()) { case MCExpr::Target: return cast(this)->printImpl(OS, MAI); @@ -75,17 +87,7 @@ void MCExpr::print(raw_ostream &OS, const MCAsmInfo *MAI, bool InParens) const { case MCExpr::SymbolRef: { const MCSymbolRefExpr &SRE = cast(*this); const MCSymbol &Sym = SRE.getSymbol(); - // Parenthesize names that start with $ so that they don't look like - // absolute names. - bool UseParens = MAI && MAI->useParensForDollarSignNames() && !InParens && - Sym.getName().starts_with('$'); - - if (UseParens) { - OS << '('; - Sym.print(OS, MAI); - OS << ')'; - } else - Sym.print(OS, MAI); + Sym.print(OS, MAI); const MCSymbolRefExpr::VariantKind Kind = SRE.getKind(); if (Kind != MCSymbolRefExpr::VK_None) { @@ -108,24 +110,26 @@ void MCExpr::print(raw_ostream &OS, const MCAsmInfo *MAI, bool InParens) const { case MCUnaryExpr::Not: OS << '~'; break; case MCUnaryExpr::Plus: OS << '+'; break; } - bool Binary = UE.getSubExpr()->getKind() == MCExpr::Binary; - if (Binary) OS << "("; - UE.getSubExpr()->print(OS, MAI); - if (Binary) OS << ")"; + UE.getSubExpr()->print(OS, MAI, MaxPrec); return; } case MCExpr::Binary: { const MCBinaryExpr &BE = cast(*this); - - // Only print parens around the LHS if it is non-trivial. - if (isa(BE.getLHS()) || isa(BE.getLHS())) { - BE.getLHS()->print(OS, MAI); - } else { + // We want to avoid redundant parentheses for relocatable expressions like + // a-b+c. + // + // Print '(' if the current operator has lower precedence than the + // surrounding operator, or if the surrounding operator's precedence is + // unknown (set to HighPrecedence). + int Prec = getPrecedence(BE.getOpcode()); + bool Paren = Prec < SurroundingPrec; + if (Paren) OS << '('; - BE.getLHS()->print(OS, MAI); - OS << ')'; - } + // Many operators' precedence is different from C. Set the precedence to + // HighPrecedence for unknown operators. + int SubPrec = Prec ? Prec : MaxPrec; + BE.getLHS()->print(OS, MAI, SubPrec); switch (BE.getOpcode()) { case MCBinaryExpr::Add: @@ -133,6 +137,8 @@ void MCExpr::print(raw_ostream &OS, const MCAsmInfo *MAI, bool InParens) const { if (const MCConstantExpr *RHSC = dyn_cast(BE.getRHS())) { if (RHSC->getValue() < 0) { OS << RHSC->getValue(); + if (Paren) + OS << ')'; return; } } @@ -160,14 +166,9 @@ void MCExpr::print(raw_ostream &OS, const MCAsmInfo *MAI, bool InParens) const { case MCBinaryExpr::Xor: OS << '^'; break; } - // Only print parens around the LHS if it is non-trivial. - if (isa(BE.getRHS()) || isa(BE.getRHS())) { - BE.getRHS()->print(OS, MAI); - } else { - OS << '('; - BE.getRHS()->print(OS, MAI); + BE.getRHS()->print(OS, MAI, SubPrec + 1); + if (Paren) OS << ')'; - } return; } } diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp index 11e122bcaac23..c94ddfa087fd3 100644 --- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp +++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp @@ -401,10 +401,10 @@ bool ELFAsmParser::maybeParseSectionType(StringRef &TypeName) { Lex(); if (L.isNot(AsmToken::At) && L.isNot(AsmToken::Percent) && L.isNot(AsmToken::String)) { - if (L.getAllowAtInIdentifier()) - return TokError("expected '@', '%' or \"\""); - else + if (getContext().getAsmInfo()->getCommentString().starts_with('@')) return TokError("expected '%' or \"\""); + else + return TokError("expected '@', '%' or \"\""); } if (!L.is(AsmToken::String)) Lex(); diff --git a/llvm/lib/ObjCopy/COFF/COFFReader.cpp b/llvm/lib/ObjCopy/COFF/COFFReader.cpp index 32aceb805a2a0..62a71d41ded5f 100644 --- a/llvm/lib/ObjCopy/COFF/COFFReader.cpp +++ b/llvm/lib/ObjCopy/COFF/COFFReader.cpp @@ -70,8 +70,7 @@ Error COFFReader::readSections(Object &Obj) const { return E; S.setContentsRef(Contents); ArrayRef Relocs = COFFObj.getRelocations(Sec); - for (const coff_relocation &R : Relocs) - S.Relocs.push_back(R); + llvm::append_range(S.Relocs, Relocs); if (Expected NameOrErr = COFFObj.getSectionName(Sec)) S.Name = *NameOrErr; else diff --git a/llvm/lib/ObjCopy/XCOFF/XCOFFReader.cpp b/llvm/lib/ObjCopy/XCOFF/XCOFFReader.cpp index 8ad3021a03428..e6018ebfbec21 100644 --- a/llvm/lib/ObjCopy/XCOFF/XCOFFReader.cpp +++ b/llvm/lib/ObjCopy/XCOFF/XCOFFReader.cpp @@ -38,8 +38,7 @@ Error XCOFFReader::readSections(Object &Obj) const { XCOFFObj.relocations(Sec); if (!Relocations) return Relocations.takeError(); - for (const XCOFFRelocation32 &Rel : Relocations.get()) - ReadSec.Relocations.push_back(Rel); + llvm::append_range(ReadSec.Relocations, Relocations.get()); } Obj.Sections.push_back(std::move(ReadSec)); diff --git a/llvm/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp b/llvm/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp index 527265410809c..381330b98f711 100644 --- a/llvm/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp +++ b/llvm/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp @@ -735,9 +735,7 @@ Expected> YAMLCoffSymbolRVASubsection::fromCodeViewSubsection( const DebugSymbolRVASubsectionRef &Section) { auto Result = std::make_shared(); - for (const auto &RVA : Section) { - Result->RVAs.push_back(RVA); - } + llvm::append_range(Result->RVAs, Section); return Result; } diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 1b37e4a4fe1a3..8646c1f49ac35 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -681,6 +681,12 @@ Expected parseHardwareLoopOptions(StringRef Params) { return HardwareLoopOpts; } +/// Parser of parameters for Lint pass. +Expected parseLintOptions(StringRef Params) { + return PassBuilder::parseSinglePassOption(Params, "abort-on-error", + "LintPass"); +} + /// Parser of parameters for LoopUnroll pass. Expected parseLoopUnrollOptions(StringRef Params) { LoopUnrollOptions UnrollOpts; diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 49135c5e1a658..a43be480d6194 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -397,7 +397,6 @@ FUNCTION_PASS("kcfi", KCFIPass()) FUNCTION_PASS("kernel-info", KernelInfoPrinter(TM)) FUNCTION_PASS("lcssa", LCSSAPass()) FUNCTION_PASS("libcalls-shrinkwrap", LibCallsShrinkWrapPass()) -FUNCTION_PASS("lint", LintPass()) FUNCTION_PASS("load-store-vectorizer", LoadStoreVectorizerPass()) FUNCTION_PASS("loop-data-prefetch", LoopDataPrefetchPass()) FUNCTION_PASS("loop-distribute", LoopDistributePass()) @@ -543,6 +542,11 @@ FUNCTION_PASS_WITH_PARAMS( parseInstCombineOptions, "no-use-loop-info;use-loop-info;no-verify-fixpoint;verify-fixpoint;" "max-iterations=N") +FUNCTION_PASS_WITH_PARAMS( + "lint", "LintPass", + [](bool AbortOnError) { return LintPass(AbortOnError); }, + parseLintOptions, + "abort-on-error") FUNCTION_PASS_WITH_PARAMS( "loop-unroll", "LoopUnrollPass", [](LoopUnrollOptions Opts) { return LoopUnrollPass(Opts); }, diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp index e34a770b1b53e..f1dd39ce133a8 100644 --- a/llvm/lib/Support/CommandLine.cpp +++ b/llvm/lib/Support/CommandLine.cpp @@ -2435,8 +2435,8 @@ class CategorizedHelpPrinter : public HelpPrinter { // Collect registered option categories into vector in preparation for // sorting. - for (OptionCategory *Category : GlobalParser->RegisteredOptionCategories) - SortedCategories.push_back(Category); + llvm::append_range(SortedCategories, + GlobalParser->RegisteredOptionCategories); // Sort the different option categories alphabetically. assert(SortedCategories.size() > 0 && "No option categories registered!"); diff --git a/llvm/lib/Support/Debug.cpp b/llvm/lib/Support/Debug.cpp index 98a9ac4722b50..5bb04d0c22998 100644 --- a/llvm/lib/Support/Debug.cpp +++ b/llvm/lib/Support/Debug.cpp @@ -73,8 +73,7 @@ void setCurrentDebugType(const char *Type) { void setCurrentDebugTypes(const char **Types, unsigned Count) { CurrentDebugType->clear(); - for (size_t T = 0; T < Count; ++T) - CurrentDebugType->push_back(Types[T]); + llvm::append_range(*CurrentDebugType, ArrayRef(Types, Count)); } } // namespace llvm diff --git a/llvm/lib/Support/SuffixTree.cpp b/llvm/lib/Support/SuffixTree.cpp index 5abcead5037f4..b2e606c86dd57 100644 --- a/llvm/lib/Support/SuffixTree.cpp +++ b/llvm/lib/Support/SuffixTree.cpp @@ -348,8 +348,7 @@ void SuffixTree::RepeatedSubstringIterator::advance() { // Yes. Update the state to reflect this, and then bail out. N = Curr; RS.Length = Length; - for (unsigned StartIdx : RepeatedSubstringStarts) - RS.StartIndices.push_back(StartIdx); + llvm::append_range(RS.StartIndices, RepeatedSubstringStarts); break; } // At this point, either NewRS is an empty RepeatedSubstring, or it was diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp index d182b647aa931..0fdf78976b691 100644 --- a/llvm/lib/TableGen/Record.cpp +++ b/llvm/lib/TableGen/Record.cpp @@ -1015,8 +1015,7 @@ const Init *UnOpInit::Fold(const Record *CurRec, bool IsFinal) const { const auto *InnerList = dyn_cast(InnerInit); if (!InnerList) return std::nullopt; - for (const Init *InnerElem : InnerList->getValues()) - Flattened.push_back(InnerElem); + llvm::append_range(Flattened, InnerList->getValues()); }; return Flattened; }; diff --git a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp index 066d62b3d4b4b..9553a44fb317e 100644 --- a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp @@ -627,10 +627,10 @@ Function *AArch64Arm64ECCallLowering::buildGuestExitThunk(Function *F) { Function::Create(Arm64Ty, GlobalValue::WeakODRLinkage, 0, ThunkName, M); GuestExit->setComdat(M->getOrInsertComdat(ThunkName)); GuestExit->setSection(".wowthk$aa"); - GuestExit->setMetadata( + GuestExit->addMetadata( "arm64ec_unmangled_name", - MDNode::get(M->getContext(), - MDString::get(M->getContext(), F->getName()))); + *MDNode::get(M->getContext(), + MDString::get(M->getContext(), F->getName()))); GuestExit->setMetadata( "arm64ec_ecmangled_name", MDNode::get(M->getContext(), @@ -803,6 +803,23 @@ bool AArch64Arm64ECCallLowering::runOnModule(Module &Mod) { DispatchFnGlobal = M->getOrInsertGlobal("__os_arm64x_dispatch_call", DispatchFnPtrType); + // Mangle names of function aliases and add the alias name to + // arm64ec_unmangled_name metadata to ensure a weak anti-dependency symbol is + // emitted for the alias as well. Do this early, before handling + // hybrid_patchable functions, to avoid mangling their aliases. + for (GlobalAlias &A : Mod.aliases()) { + auto F = dyn_cast_or_null(A.getAliaseeObject()); + if (!F) + continue; + if (std::optional MangledName = + getArm64ECMangledFunctionName(A.getName().str())) { + F->addMetadata("arm64ec_unmangled_name", + *MDNode::get(M->getContext(), + MDString::get(M->getContext(), A.getName()))); + A.setName(MangledName.value()); + } + } + DenseMap FnsMap; SetVector PatchableFns; @@ -837,20 +854,24 @@ bool AArch64Arm64ECCallLowering::runOnModule(Module &Mod) { // emitGlobalAlias to emit the right alias. auto *A = GlobalAlias::create(GlobalValue::LinkOnceODRLinkage, OrigName, &F); + auto *AM = GlobalAlias::create(GlobalValue::LinkOnceODRLinkage, + MangledName.value(), &F); + F.replaceUsesWithIf(AM, + [](Use &U) { return isa(U.getUser()); }); F.replaceAllUsesWith(A); F.setMetadata("arm64ec_exp_name", MDNode::get(M->getContext(), MDString::get(M->getContext(), "EXP+" + MangledName.value()))); A->setAliasee(&F); + AM->setAliasee(&F); if (F.hasDLLExportStorageClass()) { A->setDLLStorageClass(GlobalValue::DLLExportStorageClass); F.setDLLStorageClass(GlobalValue::DefaultStorageClass); } - FnsMap[A] = GlobalAlias::create(GlobalValue::LinkOnceODRLinkage, - MangledName.value(), &F); + FnsMap[A] = AM; PatchableFns.insert(A); } } @@ -928,9 +949,9 @@ bool AArch64Arm64ECCallLowering::processFunction( if (!F.hasLocalLinkage() || F.hasAddressTaken()) { if (std::optional MangledName = getArm64ECMangledFunctionName(F.getName().str())) { - F.setMetadata("arm64ec_unmangled_name", - MDNode::get(M->getContext(), - MDString::get(M->getContext(), F.getName()))); + F.addMetadata("arm64ec_unmangled_name", + *MDNode::get(M->getContext(), + MDString::get(M->getContext(), F.getName()))); if (F.hasComdat() && F.getComdat()->getName() == F.getName()) { Comdat *MangledComdat = M->getOrInsertComdat(MangledName.value()); SmallVector ComdatUsers = diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index ff1aee9bda6e5..db0652bc5949c 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -45,6 +45,7 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -226,6 +227,12 @@ class AArch64AsmPrinter : public AsmPrinter { } bool runOnMachineFunction(MachineFunction &MF) override { + if (auto *PSIW = getAnalysisIfAvailable()) + PSI = &PSIW->getPSI(); + if (auto *SDPIW = + getAnalysisIfAvailable()) + SDPI = &SDPIW->getStaticDataProfileInfo(); + AArch64FI = MF.getInfo(); STI = &MF.getSubtarget(); @@ -1381,22 +1388,21 @@ void AArch64AsmPrinter::emitFunctionEntryLabel() { return Sym; }; - if (MCSymbol *UnmangledSym = - getSymbolFromMetadata("arm64ec_unmangled_name")) { - MCSymbol *ECMangledSym = getSymbolFromMetadata("arm64ec_ecmangled_name"); - - if (ECMangledSym) { - // An external function, emit the alias from the unmangled symbol to - // mangled symbol name and the alias from the mangled symbol to guest - // exit thunk. + SmallVector UnmangledNames; + MF->getFunction().getMetadata("arm64ec_unmangled_name", UnmangledNames); + for (MDNode *Node : UnmangledNames) { + StringRef NameStr = cast(Node->getOperand(0))->getString(); + MCSymbol *UnmangledSym = MMI->getContext().getOrCreateSymbol(NameStr); + if (std::optional MangledName = + getArm64ECMangledFunctionName(UnmangledSym->getName())) { + MCSymbol *ECMangledSym = + MMI->getContext().getOrCreateSymbol(*MangledName); emitFunctionAlias(UnmangledSym, ECMangledSym); - emitFunctionAlias(ECMangledSym, CurrentFnSym); - } else { - // A function implementation, emit the alias from the unmangled symbol - // to mangled symbol name. - emitFunctionAlias(UnmangledSym, CurrentFnSym); } } + if (MCSymbol *ECMangledSym = + getSymbolFromMetadata("arm64ec_ecmangled_name")) + emitFunctionAlias(ECMangledSym, CurrentFnSym); } } diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 1761f58faf0fe..6bf6ce7167833 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -5552,12 +5552,9 @@ void AArch64FrameLowering::emitRemarks( return; llvm::sort(StackAccesses); - StackAccesses.erase(llvm::remove_if(StackAccesses, - [](const StackAccess &S) { - return S.AccessTypes == - StackAccess::NotAccessed; - }), - StackAccesses.end()); + llvm::erase_if(StackAccesses, [](const StackAccess &S) { + return S.AccessTypes == StackAccess::NotAccessed; + }); SmallVector MixedObjects; SmallVector> HazardPairs; diff --git a/llvm/lib/Target/AArch64/AArch64PfmCounters.td b/llvm/lib/Target/AArch64/AArch64PfmCounters.td index b1d1664e3f1b1..c7132b40ca2fe 100644 --- a/llvm/lib/Target/AArch64/AArch64PfmCounters.td +++ b/llvm/lib/Target/AArch64/AArch64PfmCounters.td @@ -10,7 +10,7 @@ // //===----------------------------------------------------------------------===// -def CpuCyclesPfmCounter : PfmCounter<"CPU_CYCLES">; +def CpuCyclesPfmCounter : PfmCounter<"CYCLES">; def DefaultPfmCounters : ProcPfmCounters { let CycleCounter = CpuCyclesPfmCounter; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp index 6fa97d82a668b..3d6b974d1f027 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -17,6 +17,7 @@ #include "AMDGPUAsmPrinter.h" #include "AMDGPUMachineFunction.h" #include "MCTargetDesc/AMDGPUInstPrinter.h" +#include "MCTargetDesc/AMDGPUMCExpr.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstr.h" @@ -43,24 +44,24 @@ AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx, const AsmPrinter &ap): Ctx(ctx), ST(st), AP(ap) { } -static MCSymbolRefExpr::VariantKind getVariantKind(unsigned MOFlags) { +static AMDGPUMCExpr::Specifier getSpecifier(unsigned MOFlags) { switch (MOFlags) { default: - return MCSymbolRefExpr::VK_None; + return AMDGPUMCExpr::S_None; case SIInstrInfo::MO_GOTPCREL: - return MCSymbolRefExpr::VK_GOTPCREL; + return AMDGPUMCExpr::S_GOTPCREL; case SIInstrInfo::MO_GOTPCREL32_LO: - return MCSymbolRefExpr::VK_AMDGPU_GOTPCREL32_LO; + return AMDGPUMCExpr::S_GOTPCREL32_LO; case SIInstrInfo::MO_GOTPCREL32_HI: - return MCSymbolRefExpr::VK_AMDGPU_GOTPCREL32_HI; + return AMDGPUMCExpr::S_GOTPCREL32_HI; case SIInstrInfo::MO_REL32_LO: - return MCSymbolRefExpr::VK_AMDGPU_REL32_LO; + return AMDGPUMCExpr::S_REL32_LO; case SIInstrInfo::MO_REL32_HI: - return MCSymbolRefExpr::VK_AMDGPU_REL32_HI; + return AMDGPUMCExpr::S_REL32_HI; case SIInstrInfo::MO_ABS32_LO: - return MCSymbolRefExpr::VK_AMDGPU_ABS32_LO; + return AMDGPUMCExpr::S_ABS32_LO; case SIInstrInfo::MO_ABS32_HI: - return MCSymbolRefExpr::VK_AMDGPU_ABS32_HI; + return AMDGPUMCExpr::S_ABS32_HI; } } @@ -85,7 +86,7 @@ bool AMDGPUMCInstLower::lowerOperand(const MachineOperand &MO, AP.getNameWithPrefix(SymbolName, GV); MCSymbol *Sym = Ctx.getOrCreateSymbol(SymbolName); const MCExpr *Expr = - MCSymbolRefExpr::create(Sym, getVariantKind(MO.getTargetFlags()),Ctx); + MCSymbolRefExpr::create(Sym, getSpecifier(MO.getTargetFlags()), Ctx); int64_t Offset = MO.getOffset(); if (Offset != 0) { Expr = MCBinaryExpr::createAdd(Expr, diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp index b22babb4a00d8..5439ea2f59111 100644 --- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp +++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp @@ -215,6 +215,11 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, bool HasVOP3DPP = ST->hasVOP3DPP(); auto OrigOp = OrigMI.getOpcode(); + if (ST->useRealTrue16Insts() && AMDGPU::isTrue16Inst(OrigOp)) { + LLVM_DEBUG( + dbgs() << " failed: Did not expect any 16-bit uses of dpp values\n"); + return nullptr; + } auto DPPOp = getDPPOp(OrigOp, IsShrinkable); if (DPPOp == -1) { LLVM_DEBUG(dbgs() << " failed: no DPP opcode\n"); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp index 2d960a32339f4..50531af627e4a 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp @@ -8,6 +8,7 @@ #include "AMDGPUFixupKinds.h" #include "AMDGPUMCTargetDesc.h" +#include "MCTargetDesc/AMDGPUMCExpr.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCValue.h" @@ -45,24 +46,24 @@ unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx, return ELF::R_AMDGPU_ABS32_LO; } - switch (Target.getAccessVariant()) { + switch (AMDGPUMCExpr::Specifier(Target.getAccessVariant())) { default: break; - case MCSymbolRefExpr::VK_GOTPCREL: + case AMDGPUMCExpr::S_GOTPCREL: return ELF::R_AMDGPU_GOTPCREL; - case MCSymbolRefExpr::VK_AMDGPU_GOTPCREL32_LO: + case AMDGPUMCExpr::S_GOTPCREL32_LO: return ELF::R_AMDGPU_GOTPCREL32_LO; - case MCSymbolRefExpr::VK_AMDGPU_GOTPCREL32_HI: + case AMDGPUMCExpr::S_GOTPCREL32_HI: return ELF::R_AMDGPU_GOTPCREL32_HI; - case MCSymbolRefExpr::VK_AMDGPU_REL32_LO: + case AMDGPUMCExpr::S_REL32_LO: return ELF::R_AMDGPU_REL32_LO; - case MCSymbolRefExpr::VK_AMDGPU_REL32_HI: + case AMDGPUMCExpr::S_REL32_HI: return ELF::R_AMDGPU_REL32_HI; - case MCSymbolRefExpr::VK_AMDGPU_REL64: + case AMDGPUMCExpr::S_REL64: return ELF::R_AMDGPU_REL64; - case MCSymbolRefExpr::VK_AMDGPU_ABS32_LO: + case AMDGPUMCExpr::S_ABS32_LO: return ELF::R_AMDGPU_ABS32_LO; - case MCSymbolRefExpr::VK_AMDGPU_ABS32_HI: + case AMDGPUMCExpr::S_ABS32_HI: return ELF::R_AMDGPU_ABS32_HI; } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp index 56c53ed587e9f..6f1d89e500ed3 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPUMCAsmInfo.h" +#include "MCTargetDesc/AMDGPUMCExpr.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSubtargetInfo.h" @@ -16,14 +17,14 @@ using namespace llvm; const MCAsmInfo::VariantKindDesc variantKindDescs[] = { - {MCSymbolRefExpr::VK_GOTPCREL, "gotpcrel"}, - {MCSymbolRefExpr::VK_AMDGPU_GOTPCREL32_LO, "gotpcrel32@lo"}, - {MCSymbolRefExpr::VK_AMDGPU_GOTPCREL32_HI, "gotpcrel32@hi"}, - {MCSymbolRefExpr::VK_AMDGPU_REL32_LO, "rel32@lo"}, - {MCSymbolRefExpr::VK_AMDGPU_REL32_HI, "rel32@hi"}, - {MCSymbolRefExpr::VK_AMDGPU_REL64, "rel64"}, - {MCSymbolRefExpr::VK_AMDGPU_ABS32_LO, "abs32@lo"}, - {MCSymbolRefExpr::VK_AMDGPU_ABS32_HI, "abs32@hi"}, + {AMDGPUMCExpr::S_GOTPCREL, "gotpcrel"}, + {AMDGPUMCExpr::S_GOTPCREL32_LO, "gotpcrel32@lo"}, + {AMDGPUMCExpr::S_GOTPCREL32_HI, "gotpcrel32@hi"}, + {AMDGPUMCExpr::S_REL32_LO, "rel32@lo"}, + {AMDGPUMCExpr::S_REL32_HI, "rel32@hi"}, + {AMDGPUMCExpr::S_REL64, "rel64"}, + {AMDGPUMCExpr::S_ABS32_LO, "abs32@lo"}, + {AMDGPUMCExpr::S_ABS32_HI, "abs32@hi"}, }; AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT, diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp index 1391ef6dd09e5..1e82ee36dc0eb 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/AMDGPUFixupKinds.h" +#include "MCTargetDesc/AMDGPUMCExpr.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIDefines.h" #include "Utils/AMDGPUBaseInfo.h" @@ -546,9 +547,8 @@ static bool needsPCRel(const MCExpr *Expr) { switch (Expr->getKind()) { case MCExpr::SymbolRef: { auto *SE = cast(Expr); - MCSymbolRefExpr::VariantKind Kind = SE->getKind(); - return Kind != MCSymbolRefExpr::VK_AMDGPU_ABS32_LO && - Kind != MCSymbolRefExpr::VK_AMDGPU_ABS32_HI; + auto Spec = AMDGPU::getSpecifier(SE); + return Spec != AMDGPUMCExpr::S_ABS32_LO && Spec != AMDGPUMCExpr::S_ABS32_HI; } case MCExpr::Binary: { auto *BE = cast(Expr); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp index 7fff2e515b046..678a7be1f2456 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp @@ -77,7 +77,7 @@ void AMDGPUMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { break; } for (const auto *It = Args.begin(); It != Args.end(); ++It) { - (*It)->print(OS, MAI, /*InParens=*/false); + (*It)->print(OS, MAI); if ((It + 1) != Args.end()) OS << ", "; } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h index c0167096f022a..f38320ae79858 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h @@ -39,6 +39,19 @@ class AMDGPUMCExpr : public MCTargetExpr { AGVK_Occupancy }; + // Relocation specifiers. + enum Specifier { + S_None, + S_GOTPCREL, // symbol@gotpcrel + S_GOTPCREL32_LO, // symbol@gotpcrel32@lo + S_GOTPCREL32_HI, // symbol@gotpcrel32@hi + S_REL32_LO, // symbol@rel32@lo + S_REL32_HI, // symbol@rel32@hi + S_REL64, // symbol@rel64 + S_ABS32_LO, // symbol@abs32@lo + S_ABS32_HI, // symbol@abs32@hi + }; + private: VariantKind Kind; MCContext &Ctx; @@ -113,6 +126,9 @@ void printAMDGPUMCExpr(const MCExpr *Expr, raw_ostream &OS, const MCExpr *foldAMDGPUMCExpr(const MCExpr *Expr, MCContext &Ctx); +static inline AMDGPUMCExpr::Specifier getSpecifier(const MCSymbolRefExpr *SRE) { + return AMDGPUMCExpr::Specifier(SRE->getKind()); +} } // end namespace AMDGPU } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index dbc4c37a77a88..a6c97a02cb959 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -1005,8 +1005,8 @@ void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor( // It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64. Streamer.emitValue( MCBinaryExpr::createSub( - MCSymbolRefExpr::create(KernelCodeSymbol, - MCSymbolRefExpr::VK_AMDGPU_REL64, Context), + MCSymbolRefExpr::create(KernelCodeSymbol, AMDGPUMCExpr::S_REL64, + Context), MCSymbolRefExpr::create(KernelDescriptorSymbol, Context), Context), sizeof(amdhsa::kernel_descriptor_t::kernel_code_entry_byte_offset)); for (uint32_t i = 0; i < sizeof(amdhsa::kernel_descriptor_t::reserved1); ++i) diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp index 8195c93d847b0..0d5287443c490 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp @@ -13,6 +13,7 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/AMDGPUMCExpr.h" #include "MCTargetDesc/R600MCTargetDesc.h" #include "R600Defines.h" #include "llvm/MC/MCCodeEmitter.h" diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index d2f9ec982ae01..a4713311e2b3e 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -19696,7 +19696,7 @@ bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const { if (Subtarget->isThumb2()) return ARM_AM::getT2SOImmVal(AbsImm) != -1; // Thumb1 only has 8-bit unsigned immediate. - return AbsImm >= 0 && AbsImm <= 255; + return AbsImm <= 255; } // Return false to prevent folding diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp index dc7e887167d30..d743f00da273b 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp @@ -138,7 +138,7 @@ void MipsInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } assert(Op.isExpr() && "unknown operand kind in printOperand"); - Op.getExpr()->print(O, &MAI, true); + Op.getExpr()->print(O, &MAI); } void MipsInstPrinter::printJumpOperand(const MCInst *MI, unsigned OpNo, diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp index d5eca7b65b2b1..39dc329d80222 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp @@ -45,7 +45,7 @@ void MipsMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { case MEK_DTPREL: // MEK_DTPREL is used for marking TLS DIEExpr only // and contains a regular sub-expression. - getSubExpr()->print(OS, MAI, true); + getSubExpr()->print(OS, MAI); return; case MEK_CALL_HI16: OS << "%call_hi"; @@ -125,7 +125,7 @@ void MipsMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { if (Expr->evaluateAsAbsolute(AbsVal)) OS << AbsVal; else - Expr->print(OS, MAI, true); + Expr->print(OS, MAI); OS << ')'; } diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp index b453024ba3725..614b3214fd275 100644 --- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp +++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp @@ -58,10 +58,6 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Triple &TheTriple, UseIntegratedAssembler = false; - // Avoid using parens for identifiers starting with $ - ptxas does - // not expect them. - UseParensForDollarSignNames = false; - // ptxas does not support DWARF `.file fileno directory filename' // syntax as of v11.X. EnableDwarfFileDirectoryDefault = false; diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index c767e1e60f17f..63d0777e4ff52 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -555,50 +555,55 @@ struct RISCVOperand final : public MCParsedAsmOperand { bool isBareSymbol() const { int64_t Imm; - RISCVMCExpr::Specifier VK = RISCVMCExpr::VK_None; // Must be of 'immediate' type but not a constant. if (!isImm() || evaluateConstantImm(getImm(), Imm)) return false; + + RISCVMCExpr::Specifier VK = RISCVMCExpr::VK_None; return RISCVAsmParser::classifySymbolRef(getImm(), VK) && VK == RISCVMCExpr::VK_None; } bool isCallSymbol() const { int64_t Imm; - RISCVMCExpr::Specifier VK = RISCVMCExpr::VK_None; // Must be of 'immediate' type but not a constant. if (!isImm() || evaluateConstantImm(getImm(), Imm)) return false; + + RISCVMCExpr::Specifier VK = RISCVMCExpr::VK_None; return RISCVAsmParser::classifySymbolRef(getImm(), VK) && (VK == RISCVMCExpr::VK_CALL || VK == RISCVMCExpr::VK_CALL_PLT); } bool isPseudoJumpSymbol() const { int64_t Imm; - RISCVMCExpr::Specifier VK = RISCVMCExpr::VK_None; // Must be of 'immediate' type but not a constant. if (!isImm() || evaluateConstantImm(getImm(), Imm)) return false; + + RISCVMCExpr::Specifier VK = RISCVMCExpr::VK_None; return RISCVAsmParser::classifySymbolRef(getImm(), VK) && VK == RISCVMCExpr::VK_CALL; } bool isTPRelAddSymbol() const { int64_t Imm; - RISCVMCExpr::Specifier VK = RISCVMCExpr::VK_None; // Must be of 'immediate' type but not a constant. if (!isImm() || evaluateConstantImm(getImm(), Imm)) return false; + + RISCVMCExpr::Specifier VK = RISCVMCExpr::VK_None; return RISCVAsmParser::classifySymbolRef(getImm(), VK) && VK == RISCVMCExpr::VK_TPREL_ADD; } bool isTLSDESCCallSymbol() const { int64_t Imm; - RISCVMCExpr::Specifier VK = RISCVMCExpr::VK_None; // Must be of 'immediate' type but not a constant. if (!isImm() || evaluateConstantImm(getImm(), Imm)) return false; + + RISCVMCExpr::Specifier VK = RISCVMCExpr::VK_None; return RISCVAsmParser::classifySymbolRef(getImm(), VK) && VK == RISCVMCExpr::VK_TLSDESC_CALL; } @@ -838,19 +843,17 @@ struct RISCVOperand final : public MCParsedAsmOperand { } bool isSImm12() const { - RISCVMCExpr::Specifier VK = RISCVMCExpr::VK_None; - int64_t Imm; - bool IsValid; if (!isImm()) return false; - bool IsConstantImm = evaluateConstantImm(getImm(), Imm); - if (!IsConstantImm) - IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK); - else - IsValid = isInt<12>(fixImmediateForRV32(Imm, isRV64Imm())); - return IsValid && - (IsConstantImm || VK == RISCVMCExpr::VK_LO || - VK == RISCVMCExpr::VK_PCREL_LO || VK == RISCVMCExpr::VK_TPREL_LO || + + int64_t Imm; + if (evaluateConstantImm(getImm(), Imm)) + return isInt<12>(fixImmediateForRV32(Imm, isRV64Imm())); + + RISCVMCExpr::Specifier VK = RISCVMCExpr::VK_None; + return RISCVAsmParser::classifySymbolRef(getImm(), VK) && + (VK == RISCVMCExpr::VK_LO || VK == RISCVMCExpr::VK_PCREL_LO || + VK == RISCVMCExpr::VK_TPREL_LO || VK == RISCVMCExpr::VK_TLSDESC_LOAD_LO || VK == RISCVMCExpr::VK_TLSDESC_ADD_LO); } @@ -873,26 +876,27 @@ struct RISCVOperand final : public MCParsedAsmOperand { } bool isUImm20LUI() const { - RISCVMCExpr::Specifier VK = RISCVMCExpr::VK_None; - int64_t Imm; if (!isImm()) return false; - bool IsConstantImm = evaluateConstantImm(getImm(), Imm); - if (IsConstantImm) + + int64_t Imm; + if (evaluateConstantImm(getImm(), Imm)) return isUInt<20>(Imm); + + RISCVMCExpr::Specifier VK = RISCVMCExpr::VK_None; return RISCVAsmParser::classifySymbolRef(getImm(), VK) && (VK == RISCVMCExpr::VK_HI || VK == RISCVMCExpr::VK_TPREL_HI); } bool isUImm20AUIPC() const { - RISCVMCExpr::Specifier VK = RISCVMCExpr::VK_None; - int64_t Imm; if (!isImm()) return false; - bool IsConstantImm = evaluateConstantImm(getImm(), Imm); - if (IsConstantImm) + + int64_t Imm; + if (evaluateConstantImm(getImm(), Imm)) return isUInt<20>(Imm); + RISCVMCExpr::Specifier VK = RISCVMCExpr::VK_None; return RISCVAsmParser::classifySymbolRef(getImm(), VK) && (VK == RISCVMCExpr::VK_PCREL_HI || VK == RISCVMCExpr::VK_GOT_HI || VK == RISCVMCExpr::VK_TLS_GOT_HI || @@ -2104,8 +2108,7 @@ ParseStatus RISCVAsmParser::parsePseudoJumpSymbol(OperandVector &Operands) { if (getParser().parseExpression(Res, E)) return ParseStatus::Failure; - if (Res->getKind() != MCExpr::ExprKind::SymbolRef || - getSpecifier(cast(Res)) == RISCVMCExpr::VK_PLTPCREL) + if (Res->getKind() != MCExpr::ExprKind::SymbolRef) return Error(S, "operand must be a valid jump target"); Res = RISCVMCExpr::create(Res, RISCVMCExpr::VK_CALL, getContext()); diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp index b5c66cc1e83f5..37cd79e890263 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp @@ -560,12 +560,11 @@ bool RISCVAsmBackend::evaluateTargetFixup(const MCAssembler &Asm, } } - if (!AUIPCTarget.getSymA() || AUIPCTarget.getSubSym()) + if (!AUIPCTarget.getSymA()) return false; - const MCSymbolRefExpr *A = AUIPCTarget.getSymA(); - const MCSymbolELF &SA = cast(A->getSymbol()); - if (getSpecifier(A) != RISCVMCExpr::VK_None || SA.isUndefined()) + const MCSymbolELF &SA = cast(*AUIPCTarget.getAddSym()); + if (SA.isUndefined()) return false; bool IsResolved = &SA.getSection() == AUIPCDF->getParent() && diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp index b27f13e6b95ba..69ad3d936fbbe 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp @@ -557,10 +557,7 @@ uint64_t RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo, FixupKind = RISCV::fixup_riscv_tlsdesc_call; break; } - } else if ((Kind == MCExpr::SymbolRef && - getSpecifier(cast(Expr)) == - RISCVMCExpr::VK_None) || - Kind == MCExpr::Binary) { + } else if (Kind == MCExpr::SymbolRef || Kind == MCExpr::Binary) { // FIXME: Sub kind binary exprs have chance of underflow. if (MIFrm == RISCVII::InstFormatJ) { FixupKind = RISCV::fixup_riscv_jal; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h index 604d2ebc66d1c..fd6993c18d820 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h @@ -49,8 +49,6 @@ class RISCVMCExpr : public MCTargetExpr { const MCExpr *Expr; const Specifier specifier; - int64_t evaluateAsInt64(int64_t Value) const; - explicit RISCVMCExpr(const MCExpr *Expr, Specifier S) : Expr(Expr), specifier(S) {} @@ -77,8 +75,6 @@ class RISCVMCExpr : public MCTargetExpr { return getSubExpr()->findAssociatedFragment(); } - bool evaluateAsConstant(int64_t &Res) const; - static bool classof(const MCExpr *E) { return E->getKind() == MCExpr::Target; } @@ -86,10 +82,6 @@ class RISCVMCExpr : public MCTargetExpr { static std::optional getSpecifierForName(StringRef name); static StringRef getSpecifierName(Specifier Kind); }; - -static inline RISCVMCExpr::Specifier getSpecifier(const MCSymbolRefExpr *SRE) { - return RISCVMCExpr::Specifier(SRE->getKind()); -} } // end namespace llvm. #endif diff --git a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp index 5be5345cca73a..b5cb05f30fb26 100644 --- a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp +++ b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp @@ -113,9 +113,10 @@ bool RISCVCodeGenPrepare::visitAnd(BinaryOperator &BO) { // vfredosum.vs v8, v8, v10 // vfmv.f.s fa0, v8 // -// This mainly affects ordered fadd reductions, since other types of reduction -// typically use element-wise vectorisation in the loop body. This tries to -// vectorize any scalar phis that feed into a fadd reduction: +// This mainly affects ordered fadd reductions and VP reductions that have a +// scalar start value, since other types of reduction typically use element-wise +// vectorisation in the loop body. This tries to vectorize any scalar phis that +// feed into these reductions: // // loop: // %phi = phi [ ..., %entry ], [ %acc, %loop ] @@ -137,7 +138,8 @@ bool RISCVCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) { if (expandVPStrideLoad(I)) return true; - if (I.getIntrinsicID() != Intrinsic::vector_reduce_fadd) + if (I.getIntrinsicID() != Intrinsic::vector_reduce_fadd && + !isa(&I)) return false; auto *PHI = dyn_cast(I.getOperand(0)); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 70ec57798db71..0dc62ef04ec0f 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -29,6 +29,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SDPatternMatch.h" #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/ValueTypes.h" @@ -79,6 +80,12 @@ static cl::opt "use for creating a floating-point immediate value"), cl::init(2)); +static cl::opt + ReassocShlAddiAdd("reassoc-shl-addi-add", cl::Hidden, + cl::desc("Swap add and addi in cases where the add may " + "be combined with a shift"), + cl::init(true)); + RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI) : TargetLowering(TM), Subtarget(STI) { @@ -14441,6 +14448,67 @@ static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT)); } +// Check if this SDValue is an add immediate that is fed by a shift of 1, 2, +// or 3. +static SDValue combineShlAddIAddImpl(SDNode *N, SDValue AddI, SDValue Other, + SelectionDAG &DAG) { + using namespace llvm::SDPatternMatch; + + // Looking for a reg-reg add and not an addi. + if (isa(N->getOperand(1))) + return SDValue(); + + // Based on testing it seems that performance degrades if the ADDI has + // more than 2 uses. + if (AddI->use_size() > 2) + return SDValue(); + + APInt AddVal; + SDValue SHLVal; + if (!sd_match(AddI, m_Add(m_Value(SHLVal), m_ConstInt(AddVal)))) + return SDValue(); + + APInt VShift; + if (!sd_match(SHLVal, m_BinOp(ISD::SHL, m_Value(), m_ConstInt(VShift)))) + return SDValue(); + + if (VShift.slt(1) || VShift.sgt(3)) + return SDValue(); + + SDLoc DL(N); + EVT VT = N->getValueType(0); + // The shift must be positive but the add can be signed. + uint64_t ShlConst = VShift.getZExtValue(); + int64_t AddConst = AddVal.getSExtValue(); + + SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, SHLVal->getOperand(0), + DAG.getConstant(ShlConst, DL, VT), Other); + return DAG.getNode(ISD::ADD, DL, VT, SHADD, + DAG.getSignedConstant(AddConst, DL, VT)); +} + +// Optimize (add (add (shl x, c0), c1), y) -> +// (ADDI (SH*ADD y, x), c1), if c0 equals to [1|2|3]. +static SDValue combineShlAddIAdd(SDNode *N, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + // Perform this optimization only in the zba extension. + if (!ReassocShlAddiAdd || !Subtarget.hasStdExtZba()) + return SDValue(); + + // Skip for vector types and larger types. + EVT VT = N->getValueType(0); + if (VT != Subtarget.getXLenVT()) + return SDValue(); + + SDValue AddI = N->getOperand(0); + SDValue Other = N->getOperand(1); + if (SDValue V = combineShlAddIAddImpl(N, AddI, Other, DAG)) + return V; + if (SDValue V = combineShlAddIAddImpl(N, Other, AddI, DAG)) + return V; + return SDValue(); +} + // Combine a constant select operand into its use: // // (and (select cond, -1, c), x) @@ -14682,9 +14750,12 @@ static SDValue performADDCombine(SDNode *N, return V; if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget)) return V; - if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) + if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) { if (SDValue V = transformAddShlImm(N, DAG, Subtarget)) return V; + if (SDValue V = combineShlAddIAdd(N, DAG, Subtarget)) + return V; + } if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) return V; if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td index 16cc0e5a61f0b..32f533b8f1146 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td @@ -98,17 +98,17 @@ def PseudoCCADDI : Pseudo<(outs GPR:$dst), ReadSFBALU]>; def PseudoCCSLLI : Pseudo<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, - GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, + GPR:$falsev, GPR:$rs1, uimmlog2xlen:$shamt), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU]>; def PseudoCCSRLI : Pseudo<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, - GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, + GPR:$falsev, GPR:$rs1, uimmlog2xlen:$shamt), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU]>; def PseudoCCSRAI : Pseudo<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, - GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, + GPR:$falsev, GPR:$rs1, uimmlog2xlen:$shamt), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU]>; def PseudoCCANDI : Pseudo<(outs GPR:$dst), @@ -161,17 +161,17 @@ def PseudoCCADDIW : Pseudo<(outs GPR:$dst), ReadSFBALU]>; def PseudoCCSLLIW : Pseudo<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, - GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, + GPR:$falsev, GPR:$rs1, uimm5:$shamt), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU]>; def PseudoCCSRLIW : Pseudo<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, - GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, + GPR:$falsev, GPR:$rs1, uimm5:$shamt), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU]>; def PseudoCCSRAIW : Pseudo<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, - GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, + GPR:$falsev, GPR:$rs1, uimm5:$shamt), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU]>; diff --git a/llvm/lib/Target/TargetLoweringObjectFile.cpp b/llvm/lib/Target/TargetLoweringObjectFile.cpp index cab9bc8678a58..0920c3345ecf3 100644 --- a/llvm/lib/Target/TargetLoweringObjectFile.cpp +++ b/llvm/lib/Target/TargetLoweringObjectFile.cpp @@ -385,6 +385,18 @@ MCSection *TargetLoweringObjectFile::getSectionForConstant( return DataSection; } +MCSection *TargetLoweringObjectFile::getSectionForConstant( + const DataLayout &DL, SectionKind Kind, const Constant *C, Align &Alignment, + StringRef SectionPrefix) const { + // Fallback to `getSectionForConstant` without `SectionPrefix` parameter if it + // is empty. + if (SectionPrefix.empty()) + return getSectionForConstant(DL, Kind, C, Alignment); + report_fatal_error( + "TargetLoweringObjectFile::getSectionForConstant that " + "accepts SectionPrefix is not implemented for the object file format"); +} + MCSection *TargetLoweringObjectFile::getSectionForMachineBasicBlock( const Function &F, const MachineBasicBlock &MBB, const TargetMachine &TM) const { diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp index 79aa898e18bfa..a227afe37d737 100644 --- a/llvm/lib/Target/X86/X86AsmPrinter.cpp +++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp @@ -20,6 +20,7 @@ #include "X86InstrInfo.h" #include "X86MachineFunctionInfo.h" #include "X86Subtarget.h" +#include "llvm/Analysis/StaticDataProfileInfo.h" #include "llvm/BinaryFormat/COFF.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -61,6 +62,11 @@ X86AsmPrinter::X86AsmPrinter(TargetMachine &TM, /// runOnMachineFunction - Emit the function body. /// bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) { + if (auto *PSIW = getAnalysisIfAvailable()) + PSI = &PSIW->getPSI(); + if (auto *SDPIW = getAnalysisIfAvailable()) + SDPI = &SDPIW->getStaticDataProfileInfo(); + Subtarget = &MF.getSubtarget(); SMShadowTracker.startFunction(MF); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 72977923bac2b..76de7e888d985 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -58823,6 +58823,8 @@ static SDValue combineINSERT_SUBVECTOR(SDNode *N, SelectionDAG &DAG, uint64_t IdxVal = N->getConstantOperandVal(2); MVT SubVecVT = SubVec.getSimpleValueType(); + int VecNumElts = OpVT.getVectorNumElements(); + int SubVecNumElts = SubVecVT.getVectorNumElements(); if (Vec.isUndef() && SubVec.isUndef()) return DAG.getUNDEF(OpVT); @@ -58882,10 +58884,9 @@ static SDValue combineINSERT_SUBVECTOR(SDNode *N, SelectionDAG &DAG, SubVec.getOperand(0).getSimpleValueType() == OpVT && (IdxVal != 0 || !(Vec.isUndef() || ISD::isBuildVectorAllZeros(Vec.getNode())))) { + SDValue ExtSrc = SubVec.getOperand(0); int ExtIdxVal = SubVec.getConstantOperandVal(1); if (ExtIdxVal != 0) { - int VecNumElts = OpVT.getVectorNumElements(); - int SubVecNumElts = SubVecVT.getVectorNumElements(); SmallVector Mask(VecNumElts); // First create an identity shuffle mask. for (int i = 0; i != VecNumElts; ++i) @@ -58893,8 +58894,7 @@ static SDValue combineINSERT_SUBVECTOR(SDNode *N, SelectionDAG &DAG, // Now insert the extracted portion. for (int i = 0; i != SubVecNumElts; ++i) Mask[i + IdxVal] = i + ExtIdxVal + VecNumElts; - - return DAG.getVectorShuffle(OpVT, dl, Vec, SubVec.getOperand(0), Mask); + return DAG.getVectorShuffle(OpVT, dl, Vec, ExtSrc, Mask); } } @@ -58942,7 +58942,7 @@ static SDValue combineINSERT_SUBVECTOR(SDNode *N, SelectionDAG &DAG, // If we're splatting the lower half subvector of a full vector load into the // upper half, attempt to create a subvector broadcast. // TODO: Drop hasOneUse checks. - if (IdxVal == (OpVT.getVectorNumElements() / 2) && + if ((int)IdxVal == (VecNumElts / 2) && Vec.getValueSizeInBits() == (2 * SubVec.getValueSizeInBits()) && (Vec.hasOneUse() || SubVec.hasOneUse())) { auto *VecLd = dyn_cast(Vec); diff --git a/llvm/lib/Target/X86/X86WinEHState.cpp b/llvm/lib/Target/X86/X86WinEHState.cpp index 1bcbc7d6e6703..dfdeada476695 100644 --- a/llvm/lib/Target/X86/X86WinEHState.cpp +++ b/llvm/lib/Target/X86/X86WinEHState.cpp @@ -721,7 +721,8 @@ void WinEHStatePass::addStateStores(Function &F, WinEHFuncInfo &FuncInfo) { // enqueue it's successors to see if we can infer their states. InitialStates.insert({BB, PredState}); FinalStates.insert({BB, PredState}); - llvm::append_range(Worklist, successors(BB)); + for (BasicBlock *SuccBB : successors(BB)) + Worklist.push_back(SuccBB); } // Try to hoist stores from successors. diff --git a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaInstPrinter.cpp b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaInstPrinter.cpp index 5f4991b51d246..da7e9098f7544 100644 --- a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaInstPrinter.cpp +++ b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaInstPrinter.cpp @@ -100,7 +100,7 @@ void XtensaInstPrinter::printBranchTarget(const MCInst *MI, int OpNum, OS << '+'; OS << Val; } else if (MC.isExpr()) - MC.getExpr()->print(OS, &MAI, true); + MC.getExpr()->print(OS, &MAI); else llvm_unreachable("Invalid operand"); } @@ -115,7 +115,7 @@ void XtensaInstPrinter::printJumpTarget(const MCInst *MI, int OpNum, OS << '+'; OS << Val; } else if (MC.isExpr()) - MC.getExpr()->print(OS, &MAI, true); + MC.getExpr()->print(OS, &MAI); else llvm_unreachable("Invalid operand"); ; @@ -131,7 +131,7 @@ void XtensaInstPrinter::printCallOperand(const MCInst *MI, int OpNum, OS << '+'; OS << Val; } else if (MC.isExpr()) - MC.getExpr()->print(OS, &MAI, true); + MC.getExpr()->print(OS, &MAI); else llvm_unreachable("Invalid operand"); } @@ -149,7 +149,7 @@ void XtensaInstPrinter::printL32RTarget(const MCInst *MI, int OpNum, O << ". "; O << Value; } else if (MC.isExpr()) - MC.getExpr()->print(O, &MAI, true); + MC.getExpr()->print(O, &MAI); else llvm_unreachable("Invalid operand"); } diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index f5ae204426170..df1f6fddeba60 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -2286,8 +2286,7 @@ void CallsiteContextGraph AllCalls; AllCalls.reserve(Node->MatchingCalls.size() + 1); AllCalls.push_back(Node->Call); - AllCalls.insert(AllCalls.end(), Node->MatchingCalls.begin(), - Node->MatchingCalls.end()); + llvm::append_range(AllCalls, Node->MatchingCalls); // First see if we can partition the calls by callee function, creating new // nodes to host each set of calls calling the same callees. This is @@ -2468,9 +2467,8 @@ bool CallsiteContextGraph::partitionCallsByCallee( // The first call becomes the primary call for this caller node, and the // rest go in the matching calls list. Info->Node->setCall(Info->Calls.front()); - Info->Node->MatchingCalls.insert(Info->Node->MatchingCalls.end(), - Info->Calls.begin() + 1, - Info->Calls.end()); + llvm::append_range(Info->Node->MatchingCalls, + llvm::drop_begin(Info->Calls)); // Save the primary call to node correspondence so that we can update // the NonAllocationCallToContextNodeMap, which is being iterated in the // caller of this function. @@ -4117,8 +4115,7 @@ bool CallsiteContextGraph::assignFunctions() { // Ignore original Node if we moved all of its contexts to clones. if (!Node->emptyContextIds()) ClonesWorklist.push_back(Node); - ClonesWorklist.insert(ClonesWorklist.end(), Node->Clones.begin(), - Node->Clones.end()); + llvm::append_range(ClonesWorklist, Node->Clones); // Now walk through all of the clones of this callsite Node that we need, // and determine the assignment to a corresponding clone of the current diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp index 82434680b8f23..938aab5879044 100644 --- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp @@ -399,7 +399,7 @@ struct ThreadingPath { void push_back(BasicBlock *BB) { Path.push_back(BB); } void push_front(BasicBlock *BB) { Path.push_front(BB); } void appendExcludingFirst(const PathType &OtherPath) { - Path.insert(Path.end(), OtherPath.begin() + 1, OtherPath.end()); + llvm::append_range(Path, llvm::drop_begin(OtherPath)); } void print(raw_ostream &OS) const { diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index 4f7956514b7b5..4c6f6f12d7138 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -3641,14 +3641,12 @@ static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, } // Next check all loops nested within L. SmallVector Worklist; - Worklist.insert(Worklist.end(), L->getSubLoops().begin(), - L->getSubLoops().end()); + llvm::append_range(Worklist, L->getSubLoops()); while (!Worklist.empty()) { auto *CurLoop = Worklist.pop_back_val(); if (!PSI->isColdBlock(CurLoop->getHeader(), BFI)) return false; - Worklist.insert(Worklist.end(), CurLoop->getSubLoops().begin(), - CurLoop->getSubLoops().end()); + llvm::append_range(Worklist, CurLoop->getSubLoops()); } return true; }; diff --git a/llvm/lib/Transforms/Utils/CodeLayout.cpp b/llvm/lib/Transforms/Utils/CodeLayout.cpp index baaad8bb48f33..c76b3afef50c2 100644 --- a/llvm/lib/Transforms/Utils/CodeLayout.cpp +++ b/llvm/lib/Transforms/Utils/CodeLayout.cpp @@ -387,7 +387,7 @@ struct ChainEdge { void appendJump(JumpT *Jump) { Jumps.push_back(Jump); } void moveJumps(ChainEdge *Other) { - Jumps.insert(Jumps.end(), Other->Jumps.begin(), Other->Jumps.end()); + llvm::append_range(Jumps, Other->Jumps); Other->Jumps.clear(); Other->Jumps.shrink_to_fit(); } diff --git a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp index 54d46117729c9..53bcaa6d3df03 100644 --- a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp +++ b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp @@ -672,8 +672,8 @@ class FlowAdjuster { // Concatenate the two paths std::vector Result; - Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end()); - Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end()); + llvm::append_range(Result, ForwardPath); + llvm::append_range(Result, BackwardPath); return Result; } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 4b4a56be19fe5..c3520dc95f8b4 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3815,13 +3815,18 @@ FixedScalableVFPair LoopVectorizationCostModel::computeFeasibleMaxVF( // It is computed by MaxVF * sizeOf(type) * 8, where type is taken from // the memory accesses that is most restrictive (involved in the smallest // dependence distance). - unsigned MaxSafeElements = - llvm::bit_floor(Legal->getMaxSafeVectorWidthInBits() / WidestType); + unsigned MaxSafeElementsPowerOf2 = + bit_floor(Legal->getMaxSafeVectorWidthInBits() / WidestType); + if (!Legal->isSafeForAnyStoreLoadForwardDistances()) { + unsigned SLDist = Legal->getMaxStoreLoadForwardSafeDistanceInBits(); + MaxSafeElementsPowerOf2 = + std::min(MaxSafeElementsPowerOf2, SLDist / WidestType); + } + auto MaxSafeFixedVF = ElementCount::getFixed(MaxSafeElementsPowerOf2); + auto MaxSafeScalableVF = getMaxLegalScalableVF(MaxSafeElementsPowerOf2); - auto MaxSafeFixedVF = ElementCount::getFixed(MaxSafeElements); - auto MaxSafeScalableVF = getMaxLegalScalableVF(MaxSafeElements); if (!Legal->isSafeForAnyVectorWidth()) - this->MaxSafeElements = MaxSafeElements; + this->MaxSafeElements = MaxSafeElementsPowerOf2; LLVM_DEBUG(dbgs() << "LV: The max safe fixed VF is: " << MaxSafeFixedVF << ".\n"); @@ -5415,7 +5420,7 @@ void LoopVectorizationCostModel::collectInstsToScalarize(ElementCount VF) { if (!isScalarAfterVectorization(&I, VF) && !VF.isScalable() && !useEmulatedMaskMemRefHack(&I, VF) && computePredInstDiscount(&I, ScalarCosts, VF) >= 0) { - ScalarCostsVF.insert(ScalarCosts.begin(), ScalarCosts.end()); + ScalarCostsVF.insert_range(ScalarCosts); // Check if we decided to scalarize a call. If so, update the widening // decision of the call to CM_Scalarize with the computed scalar cost. for (const auto &[I, Cost] : ScalarCosts) { @@ -7704,8 +7709,8 @@ DenseMap LoopVectorizationPlanner::executePlan( VPlanTransforms::convertToConcreteRecipes(BestVPlan); // Perform the actual loop transformation. - VPTransformState State(&TTI, BestVF, BestUF, LI, DT, ILV.Builder, &ILV, - &BestVPlan, OrigLoop->getParentLoop(), + VPTransformState State(&TTI, BestVF, LI, DT, ILV.Builder, &ILV, &BestVPlan, + OrigLoop->getParentLoop(), Legal->getWidestInductionType()); #ifdef EXPENSIVE_CHECKS diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 850895895d44d..a4b0378abc075 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3733,16 +3733,8 @@ class BoUpSLP { Last->ReorderIndices.append(ReorderIndices.begin(), ReorderIndices.end()); } if (EntryState == TreeEntry::SplitVectorize) { - auto *MainOp = - cast(*find_if(Last->Scalars, IsaPred)); - auto *AltOp = cast(*find_if(Last->Scalars, [=](Value *V) { - auto *I = dyn_cast(V); - if (!I) - return false; - InstructionsState LocalS = getSameOpcode({I, MainOp}, *TLI); - return !LocalS || LocalS.isAltShuffle(); - })); - Last->setOperations(InstructionsState(MainOp, AltOp)); + assert(S && "Split nodes must have operations."); + Last->setOperations(S); SmallPtrSet Processed; for (Value *V : VL) { auto *I = dyn_cast(V); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 8b53c559f6533..1e2f70e5c103e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -216,7 +216,7 @@ VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() { } VPTransformState::VPTransformState(const TargetTransformInfo *TTI, - ElementCount VF, unsigned UF, LoopInfo *LI, + ElementCount VF, LoopInfo *LI, DominatorTree *DT, IRBuilderBase &Builder, InnerLoopVectorizer *ILV, VPlan *Plan, Loop *CurrentParentLoop, Type *CanonicalIVTy) diff --git a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h index 8e5b974d887f4..bebea1915690f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h +++ b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h @@ -202,7 +202,7 @@ class VPLane { /// VPTransformState holds information passed down when "executing" a VPlan, /// needed for generating the output IR. struct VPTransformState { - VPTransformState(const TargetTransformInfo *TTI, ElementCount VF, unsigned UF, + VPTransformState(const TargetTransformInfo *TTI, ElementCount VF, LoopInfo *LI, DominatorTree *DT, IRBuilderBase &Builder, InnerLoopVectorizer *ILV, VPlan *Plan, Loop *CurrentParentLoop, Type *CanonicalIVTy); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 8852540aec931..3ebd844d6a5a1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1163,35 +1163,75 @@ static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan, return MadeChange; } -/// Try to simplify the branch condition of \p Plan. This may restrict the -/// resulting plan to \p BestVF and \p BestUF. -static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF, - unsigned BestUF, - PredicatedScalarEvolution &PSE) { - VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion(); - VPBasicBlock *ExitingVPBB = VectorRegion->getExitingBasicBlock(); - auto *Term = &ExitingVPBB->back(); - // Try to simplify the branch condition if TC <= VF * UF when preparing to - // execute the plan for the main vector loop. We only do this if the - // terminator is: - // 1. BranchOnCount, or - // 2. BranchOnCond where the input is Not(ActiveLaneMask). +/// Return true if \p Cond is known to be true for given \p BestVF and \p +/// BestUF. +static bool isConditionTrueViaVFAndUF(VPValue *Cond, VPlan &Plan, + ElementCount BestVF, unsigned BestUF, + ScalarEvolution &SE) { using namespace llvm::VPlanPatternMatch; - if (!match(Term, m_BranchOnCount(m_VPValue(), m_VPValue())) && - !match(Term, - m_BranchOnCond(m_Not(m_ActiveLaneMask(m_VPValue(), m_VPValue()))))) + if (match(Cond, m_Binary(m_VPValue(), m_VPValue()))) + return any_of(Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF, + &SE](VPValue *C) { + return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, SE); + }); + + auto *CanIV = Plan.getCanonicalIV(); + if (!match(Cond, m_Binary( + m_Specific(CanIV->getBackedgeValue()), + m_Specific(&Plan.getVectorTripCount()))) || + cast(Cond->getDefiningRecipe())->getPredicate() != + CmpInst::ICMP_EQ) return false; - ScalarEvolution &SE = *PSE.getSE(); + // The compare checks CanIV + VFxUF == vector trip count. The vector trip + // count is not conveniently available as SCEV so far, so we compare directly + // against the original trip count. This is stricter than necessary, as we + // will only return true if the trip count == vector trip count. + // TODO: Use SCEV for vector trip count once available, to cover cases where + // vector trip count == UF * VF, but original trip count != UF * VF. const SCEV *TripCount = vputils::getSCEVExprForVPValue(Plan.getTripCount(), SE); assert(!isa(TripCount) && "Trip count SCEV must be computable"); ElementCount NumElements = BestVF.multiplyCoefficientBy(BestUF); const SCEV *C = SE.getElementCount(TripCount->getType(), NumElements); - if (TripCount->isZero() || - !SE.isKnownPredicate(CmpInst::ICMP_ULE, TripCount, C)) + return SE.isKnownPredicate(CmpInst::ICMP_EQ, TripCount, C); +} + +/// Try to simplify the branch condition of \p Plan. This may restrict the +/// resulting plan to \p BestVF and \p BestUF. +static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF, + unsigned BestUF, + PredicatedScalarEvolution &PSE) { + VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion(); + VPBasicBlock *ExitingVPBB = VectorRegion->getExitingBasicBlock(); + auto *Term = &ExitingVPBB->back(); + VPValue *Cond; + ScalarEvolution &SE = *PSE.getSE(); + using namespace llvm::VPlanPatternMatch; + if (match(Term, m_BranchOnCount(m_VPValue(), m_VPValue())) || + match(Term, m_BranchOnCond( + m_Not(m_ActiveLaneMask(m_VPValue(), m_VPValue()))))) { + // Try to simplify the branch condition if TC <= VF * UF when the latch + // terminator is BranchOnCount or BranchOnCond where the input is + // Not(ActiveLaneMask). + const SCEV *TripCount = + vputils::getSCEVExprForVPValue(Plan.getTripCount(), SE); + assert(!isa(TripCount) && + "Trip count SCEV must be computable"); + ElementCount NumElements = BestVF.multiplyCoefficientBy(BestUF); + const SCEV *C = SE.getElementCount(TripCount->getType(), NumElements); + if (TripCount->isZero() || + !SE.isKnownPredicate(CmpInst::ICMP_ULE, TripCount, C)) + return false; + } else if (match(Term, m_BranchOnCond(m_VPValue(Cond)))) { + // For BranchOnCond, check if we can prove the condition to be true using VF + // and UF. + if (!isConditionTrueViaVFAndUF(Cond, Plan, BestVF, BestUF, SE)) + return false; + } else { return false; + } // The vector loop region only executes once. If possible, completely remove // the region, otherwise replace the terminator controlling the latch with diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h index 6ddb88308955f..87c5797d9e452 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h @@ -45,7 +45,8 @@ inline bool isUniformAfterVectorization(const VPValue *VPV) { return true; if (auto *Rep = dyn_cast(VPV)) return Rep->isUniform(); - if (isa(VPV)) + if (isa(VPV)) return all_of(VPV->getDefiningRecipe()->operands(), isUniformAfterVectorization); if (auto *VPI = dyn_cast(VPV)) @@ -53,8 +54,6 @@ inline bool isUniformAfterVectorization(const VPValue *VPV) { ((Instruction::isBinaryOp(VPI->getOpcode()) || VPI->getOpcode() == VPInstruction::PtrAdd) && all_of(VPI->operands(), isUniformAfterVectorization)); - if (auto *IV = dyn_cast(VPV)) - return all_of(IV->operands(), isUniformAfterVectorization); // VPExpandSCEVRecipes must be placed in the entry and are alway uniform. return isa(VPV); diff --git a/llvm/test/Analysis/Lint/abort-on-error.ll b/llvm/test/Analysis/Lint/abort-on-error.ll index 3efc38aea887c..0bbbcfa9d7418 100644 --- a/llvm/test/Analysis/Lint/abort-on-error.ll +++ b/llvm/test/Analysis/Lint/abort-on-error.ll @@ -1,8 +1,8 @@ -; RUN: not opt -passes=lint -disable-output --lint-abort-on-error %s 2>&1 | FileCheck %s +; RUN: not opt -passes='lint' -disable-output %s 2>&1 | FileCheck %s ; CHECK: Undefined behavior: Division by zero ; CHECK-NEXT: %b = sdiv i32 %a, 0 -; CHECK-NEXT: LLVM ERROR: Linter found errors, aborting. (enabled by --lint-abort-on-error) +; CHECK-NEXT: LLVM ERROR: linter found errors, aborting. (enabled by abort-on-error) define i32 @sdiv_by_zero(i32 %a) { %b = sdiv i32 %a, 0 diff --git a/llvm/test/Analysis/Lint/const-store.ll b/llvm/test/Analysis/Lint/const-store.ll index 030a0be3aecc2..748f752b2975f 100644 --- a/llvm/test/Analysis/Lint/const-store.ll +++ b/llvm/test/Analysis/Lint/const-store.ll @@ -1,6 +1,6 @@ -; RUN: not opt --mtriple=amdgcn --passes=lint --lint-abort-on-error %s -disable-output 2>&1 | FileCheck %s +; RUN: not opt --mtriple=amdgcn --passes='lint' %s -disable-output 2>&1 | FileCheck %s ; RUN: opt --mtriple=amdgcn --mcpu=gfx1030 --passes=lint %s -disable-output 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK0 -; RUN: opt --mtriple=x86_64 --passes=lint --lint-abort-on-error %s -disable-output 2>&1 | FileCheck %s --allow-empty --check-prefix=NOERR +; RUN: opt --mtriple=x86_64 --passes='lint' %s -disable-output 2>&1 | FileCheck %s --allow-empty --check-prefix=NOERR ; NOERR: {{^$}} define amdgpu_kernel void @store_const(ptr addrspace(4) %out, i32 %a, i32 %b) { diff --git a/llvm/test/Analysis/LoopAccessAnalysis/safe-with-dep-distance.ll b/llvm/test/Analysis/LoopAccessAnalysis/safe-with-dep-distance.ll index efa3100464759..8e249b36f6445 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/safe-with-dep-distance.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/safe-with-dep-distance.ll @@ -4,7 +4,7 @@ ; for (i = 0; i < n; i++) ; A[i + 4] = A[i] * 2; -; CHECK: Memory dependences are safe with a maximum safe vector width of 64 bits +; CHECK: Memory dependences are safe with a maximum safe vector width of 64 bits, with a maximum safe store-load forward width of 64 bits target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.10.0" diff --git a/llvm/test/Analysis/LoopAccessAnalysis/stride-access-dependence.ll b/llvm/test/Analysis/LoopAccessAnalysis/stride-access-dependence.ll index ef19e173b6599..335ad67faee04 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/stride-access-dependence.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/stride-access-dependence.ll @@ -276,7 +276,7 @@ for.body: ; preds = %entry, %for.body define void @vectorizable_Read_Write(ptr nocapture %A) { ; CHECK-LABEL: 'vectorizable_Read_Write' ; CHECK-NEXT: for.body: -; CHECK-NEXT: Memory dependences are safe with a maximum safe vector width of 64 bits +; CHECK-NEXT: Memory dependences are safe with a maximum safe vector width of 64 bits, with a maximum safe store-load forward width of 64 bits ; CHECK-NEXT: Dependences: ; CHECK-NEXT: BackwardVectorizable: ; CHECK-NEXT: %0 = load i32, ptr %arrayidx, align 4 -> diff --git a/llvm/test/Bitcode/fixedpoint_type.ll b/llvm/test/Bitcode/fixedpoint_type.ll new file mode 100644 index 0000000000000..bbe1fdac9a4e6 --- /dev/null +++ b/llvm/test/Bitcode/fixedpoint_type.ll @@ -0,0 +1,29 @@ +;; This test checks generation of DIFixedPointType. + +; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s + +;; Test whether DIFixedPointType is generated. +; CHECK: !DIFixedPointType(name: "fp__decimal", size: 32, align: 32, encoding: DW_ATE_signed_fixed, kind: Decimal, factor: -4) +; CHECK: !DIFixedPointType(name: "fp__rational", size: 32, align: 32, encoding: DW_ATE_unsigned_fixed, kind: Rational, numerator: 1234, denominator: 5678) +; CHECK: !DIFixedPointType(name: "fp__binary", size: 64, encoding: DW_ATE_unsigned_fixed, kind: Binary, factor: -16) + +; ModuleID = 'fixedpoint_type.ll' +source_filename = "/dir/fixedpoint_type.adb" + +!llvm.module.flags = !{!0, !1} +!llvm.dbg.cu = !{!2} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{i32 2, !"Dwarf Version", i32 4} +!2 = distinct !DICompileUnit(language: DW_LANG_Ada95, file: !3, producer: "GNAT/LLVM", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !5, globals: !4, imports: !4) +!3 = !DIFile(filename: "fixedpoint_type.adb", directory: "/dir") +!4 = !{} +!5 = !{!11, !12, !13} +!6 = distinct !DISubprogram(name: "fp", scope: !3, file: !3, line: 1, type: !7, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !4, retainedNodes: !9) +!7 = !DISubroutineType(types: !8) +!8 = !{null} +!9 = !{!10} +!10 = !DILocalVariable(name: "x", scope: !6, file: !3, line: 3, type: !11, align: 32) +!11 = !DIFixedPointType(name: "fp__decimal", size: 32, align: 32, encoding: DW_ATE_signed_fixed, kind: Decimal, factor: -4) +!12 = !DIFixedPointType(name: "fp__rational", size: 32, align: 32, encoding: DW_ATE_unsigned_fixed, kind: Rational, numerator: 1234, denominator: 5678) +!13 = !DIFixedPointType(name: "fp__binary", size: 64, align: 0, encoding: DW_ATE_unsigned_fixed, kind: Binary, factor: -16) diff --git a/llvm/test/CodeGen/AArch64/arm64ec-alias.ll b/llvm/test/CodeGen/AArch64/arm64ec-alias.ll new file mode 100644 index 0000000000000..03cc873136940 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64ec-alias.ll @@ -0,0 +1,42 @@ +; RUN: llc -mtriple arm64ec-windows-msvc -filetype asm -o - %s | FileCheck %s + +define void @func() { + ret void +} + +define dso_local void @patchable_func() hybrid_patchable { + ret void +} + +@func_alias = alias void (), ptr @func +@func_alias2 = alias void (), ptr @func_alias +@patchable_alias = alias void (), ptr @patchable_func + +; CHECK: .weak_anti_dep func_alias +; CHECK-NEXT: .set func_alias, "#func_alias" +; CHECK-NEXT: .weak_anti_dep func_alias2 +; CHECK-NEXT: .set func_alias2, "#func_alias2" +; CHECK-NEXT: .weak_anti_dep func +; CHECK-NEXT: .set func, "#func" +; CHECK: .weak_anti_dep patchable_alias +; CHECK-NEXT: .set patchable_alias, "#patchable_alias" + +; CHECK: .globl "#func_alias" +; CHECK-NEXT: .def "#func_alias"; +; CHECK-NEXT: .scl 2; +; CHECK-NEXT: .type 32; +; CHECK-NEXT: .endef +; CHECK-NEXT: .set "#func_alias", "#func" +; CHECK-NEXT: .globl "#func_alias2" +; CHECK-NEXT: .def "#func_alias2"; +; CHECK-NEXT: .scl 2; +; CHECK-NEXT: .type 32; +; CHECK-NEXT: .endef +; CHECK-NEXT: .set "#func_alias2", "#func_alias" + +; CHECK: .globl "#patchable_alias" +; CHECK-NEXT: .def "#patchable_alias"; +; CHECK-NEXT: .scl 2; +; CHECK-NEXT: .type 32; +; CHECK-NEXT: .endef +; CHECK-NEXT: .set "#patchable_alias", "#patchable_func" diff --git a/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll b/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll index dcc675839b714..cba7a8100930f 100644 --- a/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll +++ b/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll @@ -35,8 +35,8 @@ declare void @no_op() nounwind; ; CHECK-NEXT: adrp x11, no_op ; CHECK-NEXT: add x11, x11, :lo12:no_op ; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall] -; CHECK-NEXT: adrp x10, ($iexit_thunk$cdecl$v$v) -; CHECK-NEXT: add x10, x10, :lo12:($iexit_thunk$cdecl$v$v) +; CHECK-NEXT: adrp x10, $iexit_thunk$cdecl$v$v +; CHECK-NEXT: add x10, x10, :lo12:$iexit_thunk$cdecl$v$v ; CHECK-NEXT: blr x8 ; CHECK-NEXT: .seh_startepilogue ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload @@ -82,8 +82,8 @@ declare i64 @simple_integers(i8, i16, i32, i64) nounwind; ; CHECK-NEXT: adrp x11, simple_integers ; CHECK-NEXT: add x11, x11, :lo12:simple_integers ; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall] -; CHECK-NEXT: adrp x10, ($iexit_thunk$cdecl$i8$i8i8i8i8) -; CHECK-NEXT: add x10, x10, :lo12:($iexit_thunk$cdecl$i8$i8i8i8i8) +; CHECK-NEXT: adrp x10, $iexit_thunk$cdecl$i8$i8i8i8i8 +; CHECK-NEXT: add x10, x10, :lo12:$iexit_thunk$cdecl$i8$i8i8i8i8 ; CHECK-NEXT: blr x8 ; CHECK-NEXT: .seh_startepilogue ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload @@ -129,8 +129,8 @@ declare double @simple_floats(float, double) nounwind; ; CHECK-NEXT: adrp x11, simple_floats ; CHECK-NEXT: add x11, x11, :lo12:simple_floats ; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall] -; CHECK-NEXT: adrp x10, ($iexit_thunk$cdecl$d$fd) -; CHECK-NEXT: add x10, x10, :lo12:($iexit_thunk$cdecl$d$fd) +; CHECK-NEXT: adrp x10, $iexit_thunk$cdecl$d$fd +; CHECK-NEXT: add x10, x10, :lo12:$iexit_thunk$cdecl$d$fd ; CHECK-NEXT: blr x8 ; CHECK-NEXT: .seh_startepilogue ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload @@ -176,8 +176,8 @@ declare void @has_varargs(...) nounwind; ; CHECK-NEXT: adrp x11, has_varargs ; CHECK-NEXT: add x11, x11, :lo12:has_varargs ; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall] -; CHECK-NEXT: adrp x10, ($iexit_thunk$cdecl$v$varargs) -; CHECK-NEXT: add x10, x10, :lo12:($iexit_thunk$cdecl$v$varargs) +; CHECK-NEXT: adrp x10, $iexit_thunk$cdecl$v$varargs +; CHECK-NEXT: add x10, x10, :lo12:$iexit_thunk$cdecl$v$varargs ; CHECK-NEXT: blr x8 ; CHECK-NEXT: .seh_startepilogue ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload @@ -223,8 +223,8 @@ declare void @has_sret(ptr sret([100 x i8])) nounwind; ; CHECK-NEXT: adrp x11, has_sret ; CHECK-NEXT: add x11, x11, :lo12:has_sret ; CHECK-NEXT: ldr x9, [x9, :lo12:__os_arm64x_check_icall] -; CHECK-NEXT: adrp x10, ($iexit_thunk$cdecl$m100$v) -; CHECK-NEXT: add x10, x10, :lo12:($iexit_thunk$cdecl$m100$v) +; CHECK-NEXT: adrp x10, $iexit_thunk$cdecl$m100$v +; CHECK-NEXT: add x10, x10, :lo12:$iexit_thunk$cdecl$m100$v ; CHECK-NEXT: blr x9 ; CHECK-NEXT: .seh_startepilogue ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload @@ -271,8 +271,8 @@ declare void @has_aligned_sret(ptr align 32 sret(%TSRet)) nounwind; ; CHECK: adrp x11, has_aligned_sret ; CHECK: add x11, x11, :lo12:has_aligned_sret ; CHECK: ldr x9, [x9, :lo12:__os_arm64x_check_icall] -; CHECK: adrp x10, ($iexit_thunk$cdecl$m16$v) -; CHECK: add x10, x10, :lo12:($iexit_thunk$cdecl$m16$v) +; CHECK: adrp x10, $iexit_thunk$cdecl$m16$v +; CHECK: add x10, x10, :lo12:$iexit_thunk$cdecl$m16$v ; CHECK: blr x9 ; CHECK: .seh_startepilogue ; CHECK: ldr x30, [sp], #16 // 8-byte Folded Reload @@ -325,8 +325,8 @@ declare [2 x i8] @small_array([2 x i8], [2 x float]) nounwind; ; CHECK-NEXT: adrp x11, small_array ; CHECK-NEXT: add x11, x11, :lo12:small_array ; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall] -; CHECK-NEXT: adrp x10, ($iexit_thunk$cdecl$m2$m2F8) -; CHECK-NEXT: add x10, x10, :lo12:($iexit_thunk$cdecl$m2$m2F8) +; CHECK-NEXT: adrp x10, $iexit_thunk$cdecl$m2$m2F8 +; CHECK-NEXT: add x10, x10, :lo12:$iexit_thunk$cdecl$m2$m2F8 ; CHECK-NEXT: blr x8 ; CHECK-NEXT: .seh_startepilogue ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload @@ -382,8 +382,8 @@ declare [3 x i64] @large_array([3 x i64], [2 x double], [2 x [2 x i64]]) nounwin ; CHECK-NEXT: adrp x11, large_array ; CHECK-NEXT: add x11, x11, :lo12:large_array ; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall] -; CHECK-NEXT: adrp x10, ($iexit_thunk$cdecl$m24$m24D16m32) -; CHECK-NEXT: add x10, x10, :lo12:($iexit_thunk$cdecl$m24$m24D16m32) +; CHECK-NEXT: adrp x10, $iexit_thunk$cdecl$m24$m24D16m32 +; CHECK-NEXT: add x10, x10, :lo12:$iexit_thunk$cdecl$m24$m24D16m32 ; CHECK-NEXT: blr x8 ; CHECK-NEXT: .seh_startepilogue ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload @@ -446,8 +446,8 @@ declare %T2 @simple_struct(%T1, %T2, %T3, %T4) nounwind; ; CHECK-NEXT: adrp x11, simple_struct ; CHECK-NEXT: add x11, x11, :lo12:simple_struct ; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall] -; CHECK-NEXT: adrp x10, ($iexit_thunk$cdecl$m8$i8m8m16m24) -; CHECK-NEXT: add x10, x10, :lo12:($iexit_thunk$cdecl$m8$i8m8m16m24) +; CHECK-NEXT: adrp x10, $iexit_thunk$cdecl$m8$i8m8m16m24 +; CHECK-NEXT: add x10, x10, :lo12:$iexit_thunk$cdecl$m8$i8m8m16m24 ; CHECK-NEXT: blr x8 ; CHECK-NEXT: .seh_startepilogue ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload @@ -499,8 +499,8 @@ declare <4 x i8> @small_vector(<4 x i8> %0) nounwind; ; CHECK-NEXT: adrp x11, small_vector ; CHECK-NEXT: add x11, x11, :lo12:small_vector ; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall] -; CHECK-NEXT: adrp x10, ($iexit_thunk$cdecl$m$m) -; CHECK-NEXT: add x10, x10, :lo12:($iexit_thunk$cdecl$m$m) +; CHECK-NEXT: adrp x10, $iexit_thunk$cdecl$m$m +; CHECK-NEXT: add x10, x10, :lo12:$iexit_thunk$cdecl$m$m ; CHECK-NEXT: blr x8 ; CHECK-NEXT: .seh_startepilogue ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload @@ -549,8 +549,8 @@ declare <8 x i16> @large_vector(<8 x i16> %0) nounwind; ; CHECK-NEXT: adrp x11, large_vector ; CHECK-NEXT: add x11, x11, :lo12:large_vector ; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall] -; CHECK-NEXT: adrp x10, ($iexit_thunk$cdecl$m16$m16) -; CHECK-NEXT: add x10, x10, :lo12:($iexit_thunk$cdecl$m16$m16) +; CHECK-NEXT: adrp x10, $iexit_thunk$cdecl$m16$m16 +; CHECK-NEXT: add x10, x10, :lo12:$iexit_thunk$cdecl$m16$m16 ; CHECK-NEXT: blr x8 ; CHECK-NEXT: .seh_startepilogue ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/arm64ec-hybrid-patchable.ll b/llvm/test/CodeGen/AArch64/arm64ec-hybrid-patchable.ll index 1ed6a273338ab..20ff5fc5bc5e1 100644 --- a/llvm/test/CodeGen/AArch64/arm64ec-hybrid-patchable.ll +++ b/llvm/test/CodeGen/AArch64/arm64ec-hybrid-patchable.ll @@ -81,8 +81,8 @@ define dso_local void @caller() nounwind { ; CHECK-NEXT: adrp x11, func ; CHECK-NEXT: add x11, x11, :lo12:func ; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall] -; CHECK-NEXT: adrp x10, ($iexit_thunk$cdecl$v$v) -; CHECK-NEXT: add x10, x10, :lo12:($iexit_thunk$cdecl$v$v) +; CHECK-NEXT: adrp x10, $iexit_thunk$cdecl$v$v +; CHECK-NEXT: add x10, x10, :lo12:$iexit_thunk$cdecl$v$v ; CHECK-NEXT: str x11, [sp, #8] ; CHECK-NEXT: blr x8 ; CHECK-NEXT: blr x11 @@ -111,8 +111,8 @@ define dso_local void @caller() nounwind { ; CHECK-NEXT: adrp x11, func ; CHECK-NEXT: add x11, x11, :lo12:func ; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_dispatch_call] -; CHECK-NEXT: adrp x10, ($iexit_thunk$cdecl$i8$v) -; CHECK-NEXT: add x10, x10, :lo12:($iexit_thunk$cdecl$i8$v) +; CHECK-NEXT: adrp x10, $iexit_thunk$cdecl$i8$v +; CHECK-NEXT: add x10, x10, :lo12:$iexit_thunk$cdecl$i8$v ; CHECK-NEXT: adrp x9, "#func$hp_target" ; CHECK-NEXT: add x9, x9, :lo12:"#func$hp_target" ; CHECK-NEXT: blr x8 @@ -138,8 +138,8 @@ define dso_local void @caller() nounwind { ; CHECK-NEXT: adrp x11, has_varargs ; CHECK-NEXT: add x11, x11, :lo12:has_varargs ; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_dispatch_call] -; CHECK-NEXT: adrp x10, ($iexit_thunk$cdecl$v$varargs) -; CHECK-NEXT: add x10, x10, :lo12:($iexit_thunk$cdecl$v$varargs) +; CHECK-NEXT: adrp x10, $iexit_thunk$cdecl$v$varargs +; CHECK-NEXT: add x10, x10, :lo12:$iexit_thunk$cdecl$v$varargs ; CHECK-NEXT: adrp x9, "#has_varargs$hp_target" ; CHECK-NEXT: add x9, x9, :lo12:"#has_varargs$hp_target" ; CHECK-NEXT: blr x8 @@ -165,8 +165,8 @@ define dso_local void @caller() nounwind { ; CHECK-NEXT: adrp x11, has_sret ; CHECK-NEXT: add x11, x11, :lo12:has_sret ; CHECK-NEXT: ldr x12, [x9, :lo12:__os_arm64x_dispatch_call] -; CHECK-NEXT: adrp x10, ($iexit_thunk$cdecl$m100$v) -; CHECK-NEXT: add x10, x10, :lo12:($iexit_thunk$cdecl$m100$v) +; CHECK-NEXT: adrp x10, $iexit_thunk$cdecl$m100$v +; CHECK-NEXT: add x10, x10, :lo12:$iexit_thunk$cdecl$m100$v ; CHECK-NEXT: adrp x9, "#has_sret$hp_target" ; CHECK-NEXT: add x9, x9, :lo12:"#has_sret$hp_target" ; CHECK-NEXT: blr x12 @@ -192,8 +192,8 @@ define dso_local void @caller() nounwind { ; CHECK-NEXT: adrp x11, exp ; CHECK-NEXT: add x11, x11, :lo12:exp ; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_dispatch_call] -; CHECK-NEXT: adrp x10, ($iexit_thunk$cdecl$v$v) -; CHECK-NEXT: add x10, x10, :lo12:($iexit_thunk$cdecl$v$v) +; CHECK-NEXT: adrp x10, $iexit_thunk$cdecl$v$v +; CHECK-NEXT: add x10, x10, :lo12:$iexit_thunk$cdecl$v$v ; CHECK-NEXT: adrp x9, "#exp$hp_target" ; CHECK-NEXT: add x9, x9, :lo12:"#exp$hp_target" ; CHECK-NEXT: blr x8 diff --git a/llvm/test/CodeGen/AArch64/bitcast-extend.ll b/llvm/test/CodeGen/AArch64/bitcast-extend.ll new file mode 100644 index 0000000000000..195c740022d10 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/bitcast-extend.ll @@ -0,0 +1,273 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI + +define <4 x i16> @z_i32_v4i16(i32 %x) { +; CHECK-SD-LABEL: z_i32_v4i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: zip1 v0.8b, v0.8b, v0.8b +; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: z_i32_v4i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov s0, w0 +; CHECK-GI-NEXT: mov b1, v0.b[1] +; CHECK-GI-NEXT: mov v2.b[0], v0.b[0] +; CHECK-GI-NEXT: mov b3, v0.b[2] +; CHECK-GI-NEXT: mov b0, v0.b[3] +; CHECK-GI-NEXT: mov v2.b[1], v1.b[0] +; CHECK-GI-NEXT: mov v2.b[2], v3.b[0] +; CHECK-GI-NEXT: mov v2.b[3], v0.b[0] +; CHECK-GI-NEXT: ushll v0.8h, v2.8b, #0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret + %b = bitcast i32 %x to <4 x i8> + %e = zext <4 x i8> %b to <4 x i16> + ret <4 x i16> %e +} + +define <4 x i32> @z_i32_v4i32(i32 %x) { +; CHECK-SD-LABEL: z_i32_v4i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: movi v1.2d, #0x0000ff000000ff +; CHECK-SD-NEXT: zip1 v0.8b, v0.8b, v0.8b +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: z_i32_v4i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov s0, w0 +; CHECK-GI-NEXT: mov b1, v0.b[2] +; CHECK-GI-NEXT: mov b2, v0.b[1] +; CHECK-GI-NEXT: mov b3, v0.b[3] +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: fmov w9, s1 +; CHECK-GI-NEXT: uxtb w8, w8 +; CHECK-GI-NEXT: fmov w10, s2 +; CHECK-GI-NEXT: fmov w11, s3 +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: uxtb w9, w9 +; CHECK-GI-NEXT: uxtb w10, w10 +; CHECK-GI-NEXT: uxtb w11, w11 +; CHECK-GI-NEXT: fmov s1, w9 +; CHECK-GI-NEXT: mov v0.h[1], w10 +; CHECK-GI-NEXT: mov v1.h[1], w11 +; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-NEXT: ret + %b = bitcast i32 %x to <4 x i8> + %e = zext <4 x i8> %b to <4 x i32> + ret <4 x i32> %e +} + +define <4 x i64> @z_i32_v4i64(i32 %x) { +; CHECK-SD-LABEL: z_i32_v4i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: movi v1.2d, #0x000000000000ff +; CHECK-SD-NEXT: umov w8, v0.b[2] +; CHECK-SD-NEXT: umov w9, v0.b[0] +; CHECK-SD-NEXT: umov w10, v0.b[3] +; CHECK-SD-NEXT: umov w11, v0.b[1] +; CHECK-SD-NEXT: fmov s0, w9 +; CHECK-SD-NEXT: fmov s2, w8 +; CHECK-SD-NEXT: mov v0.s[1], w11 +; CHECK-SD-NEXT: mov v2.s[1], w10 +; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0 +; CHECK-SD-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-SD-NEXT: and v1.16b, v2.16b, v1.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: z_i32_v4i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov s0, w0 +; CHECK-GI-NEXT: mov b1, v0.b[2] +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: mov b2, v0.b[1] +; CHECK-GI-NEXT: mov b3, v0.b[3] +; CHECK-GI-NEXT: ubfx x8, x8, #0, #8 +; CHECK-GI-NEXT: fmov w9, s1 +; CHECK-GI-NEXT: mov v0.d[0], x8 +; CHECK-GI-NEXT: fmov w8, s2 +; CHECK-GI-NEXT: ubfx x9, x9, #0, #8 +; CHECK-GI-NEXT: ubfx x8, x8, #0, #8 +; CHECK-GI-NEXT: mov v1.d[0], x9 +; CHECK-GI-NEXT: fmov w9, s3 +; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: ubfx x9, x9, #0, #8 +; CHECK-GI-NEXT: mov v1.d[1], x9 +; CHECK-GI-NEXT: ret + %b = bitcast i32 %x to <4 x i8> + %e = zext <4 x i8> %b to <4 x i64> + ret <4 x i64> %e +} + +define <4 x i16> @s_i32_v4i16(i32 %x) { +; CHECK-SD-LABEL: s_i32_v4i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: zip1 v0.8b, v0.8b, v0.8b +; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8 +; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: s_i32_v4i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov s0, w0 +; CHECK-GI-NEXT: mov b1, v0.b[1] +; CHECK-GI-NEXT: mov v2.b[0], v0.b[0] +; CHECK-GI-NEXT: mov b3, v0.b[2] +; CHECK-GI-NEXT: mov b0, v0.b[3] +; CHECK-GI-NEXT: mov v2.b[1], v1.b[0] +; CHECK-GI-NEXT: mov v2.b[2], v3.b[0] +; CHECK-GI-NEXT: mov v2.b[3], v0.b[0] +; CHECK-GI-NEXT: sshll v0.8h, v2.8b, #0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret + %b = bitcast i32 %x to <4 x i8> + %e = sext <4 x i8> %b to <4 x i16> + ret <4 x i16> %e +} + +define <4 x i32> @s_i32_v4i32(i32 %x) { +; CHECK-SD-LABEL: s_i32_v4i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: zip1 v0.8b, v0.8b, v0.8b +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: shl v0.4s, v0.4s, #24 +; CHECK-SD-NEXT: sshr v0.4s, v0.4s, #24 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: s_i32_v4i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov s0, w0 +; CHECK-GI-NEXT: mov b1, v0.b[2] +; CHECK-GI-NEXT: mov b2, v0.b[1] +; CHECK-GI-NEXT: mov b3, v0.b[3] +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: fmov w9, s1 +; CHECK-GI-NEXT: sxtb w8, w8 +; CHECK-GI-NEXT: fmov w10, s2 +; CHECK-GI-NEXT: fmov w11, s3 +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: sxtb w9, w9 +; CHECK-GI-NEXT: sxtb w10, w10 +; CHECK-GI-NEXT: sxtb w11, w11 +; CHECK-GI-NEXT: fmov s1, w9 +; CHECK-GI-NEXT: mov v0.h[1], w10 +; CHECK-GI-NEXT: mov v1.h[1], w11 +; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0 +; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-NEXT: ret + %b = bitcast i32 %x to <4 x i8> + %e = sext <4 x i8> %b to <4 x i32> + ret <4 x i32> %e +} + +define <4 x i64> @s_i32_v4i64(i32 %x) { +; CHECK-SD-LABEL: s_i32_v4i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: umov w8, v0.b[2] +; CHECK-SD-NEXT: umov w9, v0.b[0] +; CHECK-SD-NEXT: umov w10, v0.b[3] +; CHECK-SD-NEXT: umov w11, v0.b[1] +; CHECK-SD-NEXT: fmov s0, w9 +; CHECK-SD-NEXT: fmov s1, w8 +; CHECK-SD-NEXT: mov v0.s[1], w11 +; CHECK-SD-NEXT: mov v1.s[1], w10 +; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-SD-NEXT: ushll v1.2d, v1.2s, #0 +; CHECK-SD-NEXT: shl v0.2d, v0.2d, #56 +; CHECK-SD-NEXT: shl v1.2d, v1.2d, #56 +; CHECK-SD-NEXT: sshr v0.2d, v0.2d, #56 +; CHECK-SD-NEXT: sshr v1.2d, v1.2d, #56 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: s_i32_v4i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov s0, w0 +; CHECK-GI-NEXT: mov b1, v0.b[2] +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: mov b2, v0.b[1] +; CHECK-GI-NEXT: mov b3, v0.b[3] +; CHECK-GI-NEXT: sxtb x8, w8 +; CHECK-GI-NEXT: fmov w9, s1 +; CHECK-GI-NEXT: mov v0.d[0], x8 +; CHECK-GI-NEXT: fmov w8, s2 +; CHECK-GI-NEXT: sxtb x9, w9 +; CHECK-GI-NEXT: sxtb x8, w8 +; CHECK-GI-NEXT: mov v1.d[0], x9 +; CHECK-GI-NEXT: fmov w9, s3 +; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: sxtb x9, w9 +; CHECK-GI-NEXT: mov v1.d[1], x9 +; CHECK-GI-NEXT: ret + %b = bitcast i32 %x to <4 x i8> + %e = sext <4 x i8> %b to <4 x i64> + ret <4 x i64> %e +} + +define void @extractbitcastext(i32 %bytes, ptr %output) { +; CHECK-LABEL: extractbitcastext: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ushll v1.2d, v0.2s, #0 +; CHECK-NEXT: ushll2 v0.2d, v0.4s, #0 +; CHECK-NEXT: stp q1, q0, [x1] +; CHECK-NEXT: ret + %conv = sext i32 %bytes to i64 + %b0 = bitcast i64 %conv to <8 x i8> + %b1 = zext <8 x i8> %b0 to <8 x i16> + %shuffle.i = shufflevector <8 x i16> %b1, <8 x i16> poison, <4 x i32> + %z2 = zext nneg <4 x i16> %shuffle.i to <4 x i32> + %shuffle.i23 = shufflevector <4 x i32> %z2, <4 x i32> poison, <2 x i32> + %z3 = zext nneg <2 x i32> %shuffle.i23 to <2 x i64> + %shuffle.i24 = shufflevector <4 x i32> %z2, <4 x i32> poison, <2 x i32> + %z4 = zext nneg <2 x i32> %shuffle.i24 to <2 x i64> + store <2 x i64> %z3, ptr %output, align 8 + %add.ptr = getelementptr inbounds nuw i8, ptr %output, i64 16 + store <2 x i64> %z4, ptr %add.ptr, align 8 + ret void +} + +define void @extractbitcastext_s(i32 %bytes, ptr %output) { +; CHECK-LABEL: extractbitcastext_s: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: sshll v1.2d, v0.2s, #0 +; CHECK-NEXT: sshll2 v0.2d, v0.4s, #0 +; CHECK-NEXT: stp q1, q0, [x1] +; CHECK-NEXT: ret + %conv = sext i32 %bytes to i64 + %b0 = bitcast i64 %conv to <8 x i8> + %b1 = sext <8 x i8> %b0 to <8 x i16> + %shuffle.i = shufflevector <8 x i16> %b1, <8 x i16> poison, <4 x i32> + %s2 = sext <4 x i16> %shuffle.i to <4 x i32> + %shuffle.i23 = shufflevector <4 x i32> %s2, <4 x i32> poison, <2 x i32> + %s3 = sext <2 x i32> %shuffle.i23 to <2 x i64> + %shuffle.i24 = shufflevector <4 x i32> %s2, <4 x i32> poison, <2 x i32> + %s4 = sext <2 x i32> %shuffle.i24 to <2 x i64> + store <2 x i64> %s3, ptr %output, align 8 + %add.ptr = getelementptr inbounds nuw i8, ptr %output, i64 16 + store <2 x i64> %s4, ptr %add.ptr, align 8 + ret void +} + + diff --git a/llvm/test/CodeGen/AArch64/constant-pool-partition.ll b/llvm/test/CodeGen/AArch64/constant-pool-partition.ll new file mode 100644 index 0000000000000..ab627b02a1bc7 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/constant-pool-partition.ll @@ -0,0 +1,172 @@ +; RUN: llc -mtriple=aarch64 -enable-split-machine-functions \ +; RUN: -partition-static-data-sections=true -function-sections=true \ +; RUN: -unique-section-names=false \ +; RUN: %s -o - 2>&1 | FileCheck %s --dump-input=always + +; Repeat the RUN command above for big-endian systems. +; RUN: llc -mtriple=aarch64_be -enable-split-machine-functions \ +; RUN: -partition-static-data-sections=true -function-sections=true \ +; RUN: -unique-section-names=false \ +; RUN: %s -o - 2>&1 | FileCheck %s --dump-input=always + +; Tests that constant pool hotness is aggregated across the module. The +; static-data-splitter processes data from cold_func first, unprofiled_func +; secondly, and then hot_func. Specifically, tests that +; - If a constant is accessed by hot functions, all constant pools for this +; constant (e.g., from an unprofiled function, or cold function) should have +; `.hot` suffix. For instance, double 0.68 is seen by both @cold_func and +; @hot_func, so two CPI emits (under label LCPI0_0 and LCPI2_0) have `.hot` +; suffix. +; - Similarly if a constant is accessed by both cold function and un-profiled +; function, constant pools for this constant should not have `.unlikely` suffix. + +;; Constant pools for function @cold_func. +; CHECK: .section .rodata.cst8.hot,"aM",@progbits,8 +; CHECK-NEXT: .p2align +; CHECK-NEXT: .LCPI0_0: +; CHECK-NEXT: .xword 0x3fe5c28f5c28f5c3 // double 0.68000000000000005 +; CHECK-NEXT: .section .rodata.cst8.unlikely,"aM",@progbits,8 +; CHECK-NEXT: .p2align +; CHECK-NEXT: .LCPI0_1: +; CHECK-NEXT: .xword 0x3fe5eb851eb851ec // double 0.68500000000000005 +; CHECK-NEXT: .section .rodata.cst8,"aM",@progbits,8 +; CHECK-NEXT: .p2align +; CHECK-NEXT: .LCPI0_2: +; CHECK-NEXT: .byte 0 // 0x0 +; CHECK-NEXT: .byte 4 // 0x4 +; CHECK-NEXT: .byte 8 // 0x8 +; CHECK-NEXT: .byte 12 // 0xc +; CHECK-NEXT: .byte 255 // 0xff +; CHECK-NEXT: .byte 255 // 0xff +; CHECK-NEXT: .byte 255 // 0xff +; CHECK-NEXT: .byte 255 // 0xff + +;; Constant pools for function @unprofiled_func +; CHECK: .section .rodata.cst8,"aM",@progbits,8 +; CHECK-NEXT: .p2align +; CHECK-NEXT: .LCPI1_0: +; CHECK-NEXT: .byte 0 // 0x0 +; CHECK-NEXT: .byte 4 // 0x4 +; CHECK-NEXT: .byte 8 // 0x8 +; CHECK-NEXT: .byte 12 // 0xc +; CHECK-NEXT: .byte 255 // 0xff +; CHECK-NEXT: .byte 255 // 0xff +; CHECK-NEXT: .byte 255 // 0xff +; CHECK-NEXT: .byte 255 // 0xff +; CHECK-NEXT: .section .rodata.cst16,"aM",@progbits,16 +; CHECK-NEXT: .p2align +; CHECK-NEXT: .LCPI1_1: +; CHECK-NEXT: .word 2 // 0x2 +; CHECK-NEXT: .word 3 // 0x3 +; CHECK-NEXT: .word 5 // 0x5 +; CHECK-NEXT: .word 7 // 0x7 +; CHECK-NEXT: .section .rodata.cst16.hot,"aM",@progbits,16 +; CHECK-NEXT: .p2align +; CHECK-NEXT: .LCPI1_2: +; CHECK-NEXT: .word 442 // 0x1ba +; CHECK-NEXT: .word 100 // 0x64 +; CHECK-NEXT: .word 0 // 0x0 +; CHECK-NEXT: .word 0 // 0x0 + +;; Constant pools for function @hot_func +; CHECK: .section .rodata.cst8.hot,"aM",@progbits,8 +; CHECK-NEXT: .p2align +; CHECK-NEXT: .LCPI2_0: +; CHECK-NEXT: .xword 0x3fe5c28f5c28f5c3 // double 0.68000000000000005 +; CHECK-NEXT: .section .rodata.cst16.hot,"aM",@progbits,16 +; CHECK-NEXT: .p2align +; CHECK-NEXT: .LCPI2_1: +; CHECK-NEXT: .word 0 // 0x0 +; CHECK-NEXT: .word 100 // 0x64 +; CHECK-NEXT: .word 0 // 0x0 +; CHECK-NEXT: .word 442 // 0x1ba +; CHECK-NEXT: .LCPI2_2: +; CHECK-NEXT: .word 442 // 0x1ba +; CHECK-NEXT: .word 100 // 0x64 +; CHECK-NEXT: .word 0 // 0x0 +; CHECK-NEXT: .word 0 // 0x0 + +;; For global variable @val +;; The section name remains `.rodata.cst32` without hotness prefix because +;; the variable has external linkage and not analyzed. Compiler need symbolized +;; data access profiles to annotate such global variables' hotness. +; CHECK: .section .rodata.cst32,"aM",@progbits,32 +; CHECK-NEXT: .globl val + +define i32 @cold_func(double %x, <16 x i8> %a, <16 x i8> %b) !prof !16 { + %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01) + %num = tail call i32 (...) @func_taking_arbitrary_param(double 6.8500000e-01) + %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> ) + %t2 = bitcast <8 x i8> %t1 to <2 x i32> + %3 = extractelement <2 x i32> %t2, i32 1 + %sum = add i32 %2, %3 + %ret = add i32 %sum, %num + ret i32 %ret +} + +declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>) +declare i32 @func_taking_arbitrary_param(...) + +define <4 x i1> @unprofiled_func(<16 x i8> %a, <16 x i8> %b) { + %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> ) + %t2 = bitcast <8 x i8> %t1 to <4 x i16> + %t3 = zext <4 x i16> %t2 to <4 x i32> + %t4 = add <4 x i32> %t3, + %cmp = icmp ule <4 x i32> , %t4 + ret <4 x i1> %cmp +} + +define <4 x i1> @hot_func(i32 %0, <4 x i32> %a) !prof !17 { + %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01) + %b = add <4 x i32> , %a + %c = icmp ule <4 x i32> %b, + ret <4 x i1> %c +} + +@val = unnamed_addr constant i256 1 + +define i32 @main(i32 %0, ptr %1) !prof !16 { + br label %7 + +5: ; preds = %7 + %x = call double @double_func() + %a = call <16 x i8> @vector_func_16i8() + %b = call <16 x i8> @vector_func_16i8() + call void @cold_func(double %x, <16 x i8> %a, <16 x i8> %b) + ret i32 0 + +7: ; preds = %7, %2 + %8 = phi i32 [ 0, %2 ], [ %10, %7 ] + %seed_val = load i256, ptr @val + %9 = call i32 @seed(i256 %seed_val) + call void @hot_func(i32 %9) + %10 = add i32 %8, 1 + %11 = icmp eq i32 %10, 100000 + br i1 %11, label %5, label %7, !prof !18 +} + +declare i32 @seed(i256) +declare double @double_func() +declare <4 x i32> @vector_func() +declare <16 x i8> @vector_func_16i8() + +!llvm.module.flags = !{!1} + +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10, !11, !12} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 1460617} +!5 = !{!"MaxCount", i64 849536} +!6 = !{!"MaxInternalCount", i64 32769} +!7 = !{!"MaxFunctionCount", i64 849536} +!8 = !{!"NumCounts", i64 23784} +!9 = !{!"NumFunctions", i64 3301} +!10 = !{!"IsPartialProfile", i64 0} +!11 = !{!"PartialProfileRatio", double 0.000000e+00} +!12 = !{!"DetailedSummary", !13} +!13 = !{!14, !15} +!14 = !{i32 990000, i64 166, i32 73} +!15 = !{i32 999999, i64 3, i32 1463} +!16 = !{!"function_entry_count", i64 1} +!17 = !{!"function_entry_count", i64 100000} +!18 = !{!"branch_weights", i32 1, i32 99999} diff --git a/llvm/test/CodeGen/AArch64/dllexport.ll b/llvm/test/CodeGen/AArch64/dllexport.ll index 580fb5fd9e79e..e15fc0a928b66 100644 --- a/llvm/test/CodeGen/AArch64/dllexport.ll +++ b/llvm/test/CodeGen/AArch64/dllexport.ll @@ -88,10 +88,10 @@ define weak_odr dllexport void @l() { ; CHECK-GNU-EC: .ascii " -export:o,data" ; CHECK-GNU-EC: .ascii " -export:p,data" ; CHECK-GNU-EC: .ascii " -export:q,data" -; CHECK-GNU-EC: .ascii " -export:r" -; CHECK-GNU-EC: .ascii " -export:s" -; CHECK-GNU-EC: .ascii " -export:t" -; CHECK-GNU-EC: .ascii " -export:u" +; CHECK-GNU-EC: .ascii " -export:#r,EXPORTAS,r" +; CHECK-GNU-EC: .ascii " -export:#s,EXPORTAS,s" +; CHECK-GNU-EC: .ascii " -export:#t,EXPORTAS,t" +; CHECK-GNU-EC: .ascii " -export:#u,EXPORTAS,u" ; CHECK-MSVC-EC-NOT: /EXPORT:f ; CHECK-MSVC-EC-NOT: /EXPORT:#f,EXPORTAS,f ; CHECK-MSVC-EC: .ascii " /EXPORT:#g,EXPORTAS,g" @@ -106,7 +106,7 @@ define weak_odr dllexport void @l() { ; CHECK-MSVC-EC: .ascii " /EXPORT:o,DATA" ; CHECK-MSVC-EC: .ascii " /EXPORT:p,DATA" ; CHECK-MSVC-EC: .ascii " /EXPORT:q,DATA" -; CHECK-MSVC-EC: .ascii " /EXPORT:r" -; CHECK-MSVC-EC: .ascii " /EXPORT:s" -; CHECK-MSVC-EC: .ascii " /EXPORT:t" -; CHECK-MSVC-EC: .ascii " /EXPORT:u" +; CHECK-MSVC-EC: .ascii " /EXPORT:#r,EXPORTAS,r" +; CHECK-MSVC-EC: .ascii " /EXPORT:#s,EXPORTAS,s" +; CHECK-MSVC-EC: .ascii " /EXPORT:#t,EXPORTAS,t" +; CHECK-MSVC-EC: .ascii " /EXPORT:#u,EXPORTAS,u" diff --git a/llvm/test/CodeGen/AArch64/pr58516.ll b/llvm/test/CodeGen/AArch64/pr58516.ll index 3361ded48d4e2..d1775a2e707b6 100644 --- a/llvm/test/CodeGen/AArch64/pr58516.ll +++ b/llvm/test/CodeGen/AArch64/pr58516.ll @@ -56,7 +56,7 @@ define void @osfx(ptr %this) comdat personality ptr @__CxxFrameHandler3 { ; CHECK-NEXT: ret ; CHECK-NEXT: .seh_endfunclet ; CHECK-NEXT: .seh_handlerdata -; CHECK-NEXT: .word ($cppxdata$osfx)@IMGREL +; CHECK-NEXT: .word $cppxdata$osfx@IMGREL ; CHECK-NEXT: .section .text,"xr",discard,osfx ; CHECK-NEXT: .seh_endproc ; CHECK-NEXT: .def "?catch$3@?0?osfx@4HA"; diff --git a/llvm/test/CodeGen/AArch64/win-catchpad-nested-cxx.ll b/llvm/test/CodeGen/AArch64/win-catchpad-nested-cxx.ll index 6d0e9d6929709..0203c337cc68b 100644 --- a/llvm/test/CodeGen/AArch64/win-catchpad-nested-cxx.ll +++ b/llvm/test/CodeGen/AArch64/win-catchpad-nested-cxx.ll @@ -45,12 +45,12 @@ handler2: ; CHECK-LABEL: $cppxdata$try_in_catch: ; CHECK-NEXT: .word 429065506 ; CHECK-NEXT: .word 4 -; CHECK-NEXT: .word ($stateUnwindMap$try_in_catch) +; CHECK-NEXT: .word $stateUnwindMap$try_in_catch ; CHECK-NEXT: .word 2 -; CHECK-NEXT: .word ($tryMap$try_in_catch) +; CHECK-NEXT: .word $tryMap$try_in_catch ; ip2state num + ptr ; CHECK-NEXT: .word 7 -; CHECK-NEXT: .word ($ip2state$try_in_catch) +; CHECK-NEXT: .word $ip2state$try_in_catch ; unwindhelp offset ; CHECK-NEXT: .word -16 ; CHECK-NEXT: .word 0 @@ -62,12 +62,12 @@ handler2: ; CHECK-NEXT: .word 0 ; CHECK-NEXT: .word 3 ; CHECK-NEXT: .word 1 -; CHECK-NEXT: .word ($handlerMap$0$try_in_catch) +; CHECK-NEXT: .word $handlerMap$0$try_in_catch ; CHECK-NEXT: .word 2 ; CHECK-NEXT: .word 2 ; CHECK-NEXT: .word 3 ; CHECK-NEXT: .word 1 -; CHECK-NEXT: .word ($handlerMap$1$try_in_catch) +; CHECK-NEXT: .word $handlerMap$1$try_in_catch ; CHECK: $handlerMap$0$try_in_catch: ; CHECK-NEXT: .word 64 diff --git a/llvm/test/CodeGen/AArch64/wineh-catchret-label-generation.ll b/llvm/test/CodeGen/AArch64/wineh-catchret-label-generation.ll index 1f30865c98e19..3f7df585c52b4 100644 --- a/llvm/test/CodeGen/AArch64/wineh-catchret-label-generation.ll +++ b/llvm/test/CodeGen/AArch64/wineh-catchret-label-generation.ll @@ -35,7 +35,7 @@ define fastcc ptr @test_function(i1 %0, ptr %_Fmtfl.i.i, i1 %1) personality ptr ; CHECK-NEXT: ret ; CHECK-NEXT: .seh_endfunclet ; CHECK-NEXT: .seh_handlerdata -; CHECK-NEXT: .word ($cppxdata$test_function)@IMGREL +; CHECK-NEXT: .word $cppxdata$test_function@IMGREL ; CHECK-NEXT: .text ; CHECK-NEXT: .seh_endproc ; CHECK-NEXT: .def "?catch$5@?0?test_function@4HA"; diff --git a/llvm/test/CodeGen/AArch64/wineh-try-catch.ll b/llvm/test/CodeGen/AArch64/wineh-try-catch.ll index c3b5a8968d7bb..e10b05e2488fd 100644 --- a/llvm/test/CodeGen/AArch64/wineh-try-catch.ll +++ b/llvm/test/CodeGen/AArch64/wineh-try-catch.ll @@ -76,11 +76,11 @@ ; CHECK-LABEL: "$cppxdata$?func@@YAHXZ": ; CHECK-NEXT: .word 429065506 // MagicNumber ; CHECK-NEXT: .word 2 // MaxState -; CHECK-NEXT: .word ("$stateUnwindMap$?func@@YAHXZ")@IMGREL // UnwindMap +; CHECK-NEXT: .word "$stateUnwindMap$?func@@YAHXZ"@IMGREL // UnwindMap ; CHECK-NEXT: .word 1 // NumTryBlocks -; CHECK-NEXT: .word ("$tryMap$?func@@YAHXZ")@IMGREL // TryBlockMap +; CHECK-NEXT: .word "$tryMap$?func@@YAHXZ"@IMGREL // TryBlockMap ; CHECK-NEXT: .word 4 // IPMapEntries -; CHECK-NEXT: .word ("$ip2state$?func@@YAHXZ")@IMGREL // IPToStateXData +; CHECK-NEXT: .word "$ip2state$?func@@YAHXZ"@IMGREL // IPToStateXData ; CHECK-NEXT: .word -16 // UnwindHelp ; UNWIND: Function: ?func@@YAHXZ (0x0) diff --git a/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll b/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll index 0e16ea10c019a..c7a20055a70d4 100644 --- a/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll +++ b/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll @@ -155,19 +155,19 @@ declare void @undef_func() ; GCN-LABEL: {{^}}kernel_call_undef_func: ; GCN: .amdhsa_next_free_vgpr max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0) -; GFX90A: .amdhsa_accum_offset ((((((alignto(max(1, kernel_call_undef_func.num_vgpr), 4))/4)-1)&(~65536))&63)+1)*4 +; GFX90A: .amdhsa_accum_offset (((((alignto(max(1, kernel_call_undef_func.num_vgpr), 4)/4)-1)&~65536)&63)+1)*4 ; GCN: .set kernel_call_undef_func.num_vgpr, max(32, amdgpu.max_num_vgpr) ; GCN: .set kernel_call_undef_func.num_agpr, max(0, amdgpu.max_num_agpr) ; GCN: NumVgprs: kernel_call_undef_func.num_vgpr ; GCN: NumAgprs: kernel_call_undef_func.num_agpr ; GCN: TotalNumVgprs: totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr) -; GFX908: VGPRBlocks: ((alignto(max(max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0), 1), 4))/4)-1 -; GFX90A: VGPRBlocks: ((alignto(max(max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0), 1), 8))/8)-1 +; GFX908: VGPRBlocks: (alignto(max(max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0), 1), 4)/4)-1 +; GFX90A: VGPRBlocks: (alignto(max(max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0), 1), 8)/8)-1 ; GCN: NumVGPRsForWavesPerEU: max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0) -; GFX90A: AccumOffset: ((((alignto(max(1, kernel_call_undef_func.num_vgpr), 4))/4)-1)+1)*4 -; GFX908: Occupancy: occupancy(10, 4, 256, 8, 10, max(kernel_call_undef_func.numbered_sgpr+(extrasgprs(kernel_call_undef_func.uses_vcc, kernel_call_undef_func.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0)) -; GFX90A: Occupancy: occupancy(8, 8, 512, 8, 8, max(kernel_call_undef_func.numbered_sgpr+(extrasgprs(kernel_call_undef_func.uses_vcc, kernel_call_undef_func.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0)) -; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: ((((alignto(max(1, kernel_call_undef_func.num_vgpr), 4))/4)-1)&(~65536))&63 +; GFX90A: AccumOffset: ((alignto(max(1, kernel_call_undef_func.num_vgpr), 4)/4)-1+1)*4 +; GFX908: Occupancy: occupancy(10, 4, 256, 8, 10, max(kernel_call_undef_func.numbered_sgpr+extrasgprs(kernel_call_undef_func.uses_vcc, kernel_call_undef_func.uses_flat_scratch, 1), 1, 0), max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0)) +; GFX90A: Occupancy: occupancy(8, 8, 512, 8, 8, max(kernel_call_undef_func.numbered_sgpr+extrasgprs(kernel_call_undef_func.uses_vcc, kernel_call_undef_func.uses_flat_scratch, 1), 1, 0), max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0)) +; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: (((alignto(max(1, kernel_call_undef_func.num_vgpr), 4)/4)-1)&~65536)&63 define amdgpu_kernel void @kernel_call_undef_func() #0 { bb: call void @undef_func() diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll index 1d49e005234e3..9de6aea9385df 100644 --- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll +++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll @@ -9,8 +9,8 @@ ; ALL-LABEL: {{^}}kernel: ; ALL: .amdhsa_next_free_vgpr max(totalnumvgprs(kernel.num_agpr, kernel.num_vgpr), 1, 0) -; ALL-NEXT: .amdhsa_next_free_sgpr (max(kernel.numbered_sgpr+(extrasgprs(kernel.uses_vcc, kernel.uses_flat_scratch, 1)), 1, 0))-(extrasgprs(kernel.uses_vcc, kernel.uses_flat_scratch, 1)) -; GFX90A-NEXT: .amdhsa_accum_offset ((((((alignto(max(1, kernel.num_vgpr), 4))/4)-1)&(~65536))&63)+1)*4 +; ALL-NEXT: .amdhsa_next_free_sgpr max(kernel.numbered_sgpr+extrasgprs(kernel.uses_vcc, kernel.uses_flat_scratch, 1), 1, 0)-extrasgprs(kernel.uses_vcc, kernel.uses_flat_scratch, 1) +; GFX90A-NEXT: .amdhsa_accum_offset (((((alignto(max(1, kernel.num_vgpr), 4)/4)-1)&~65536)&63)+1)*4 ; ALL: .set kernel.num_vgpr, max(41, .Laliasee_default.num_vgpr) ; ALL-NEXT: .set kernel.num_agpr, max(0, .Laliasee_default.num_agpr) diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll index cbc8e7882c45e..fe27859eb0afd 100644 --- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll +++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll @@ -10,7 +10,7 @@ ; CHECK-LABEL: {{^}}kernel1: ; CHECK: .amdhsa_next_free_vgpr max(totalnumvgprs(kernel1.num_agpr, kernel1.num_vgpr), 1, 0) -; CHECK-NEXT: .amdhsa_next_free_sgpr (max(kernel1.numbered_sgpr+(extrasgprs(kernel1.uses_vcc, kernel1.uses_flat_scratch, 1)), 1, 0))-(extrasgprs(kernel1.uses_vcc, kernel1.uses_flat_scratch, 1)) +; CHECK-NEXT: .amdhsa_next_free_sgpr max(kernel1.numbered_sgpr+extrasgprs(kernel1.uses_vcc, kernel1.uses_flat_scratch, 1), 1, 0)-extrasgprs(kernel1.uses_vcc, kernel1.uses_flat_scratch, 1) ; CHECK: .set kernel1.num_vgpr, max(42, .Laliasee_vgpr32_sgpr76.num_vgpr) ; CHECK-NEXT: .set kernel1.num_agpr, max(0, .Laliasee_vgpr32_sgpr76.num_agpr) diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll index cdefbab93c62d..35b67351e85dd 100644 --- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll +++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll @@ -8,7 +8,7 @@ ; CHECK-LABEL: {{^}}kernel2: ; CHECK: .amdhsa_next_free_vgpr max(totalnumvgprs(kernel2.num_agpr, kernel2.num_vgpr), 1, 0) -; CHECK-NEXT: .amdhsa_next_free_sgpr (max(kernel2.numbered_sgpr+(extrasgprs(kernel2.uses_vcc, kernel2.uses_flat_scratch, 1)), 1, 0))-(extrasgprs(kernel2.uses_vcc, kernel2.uses_flat_scratch, 1)) +; CHECK-NEXT: .amdhsa_next_free_sgpr max(kernel2.numbered_sgpr+extrasgprs(kernel2.uses_vcc, kernel2.uses_flat_scratch, 1), 1, 0)-extrasgprs(kernel2.uses_vcc, kernel2.uses_flat_scratch, 1) ; CHECK: .set kernel2.num_vgpr, max(41, .Laliasee_vgpr64_sgpr102.num_vgpr) ; CHECK-NEXT: .set kernel2.num_agpr, max(0, .Laliasee_vgpr64_sgpr102.num_agpr) diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll index 43dd0a7233604..3674d740b987b 100644 --- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll +++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll @@ -8,7 +8,7 @@ ; CHECK-LABEL: {{^}}kernel3: ; CHECK: .amdhsa_next_free_vgpr max(totalnumvgprs(kernel3.num_agpr, kernel3.num_vgpr), 1, 0) -; CHECK-NEXT: .amdhsa_next_free_sgpr (max(kernel3.numbered_sgpr+(extrasgprs(kernel3.uses_vcc, kernel3.uses_flat_scratch, 1)), 1, 0))-(extrasgprs(kernel3.uses_vcc, kernel3.uses_flat_scratch, 1)) +; CHECK-NEXT: .amdhsa_next_free_sgpr max(kernel3.numbered_sgpr+extrasgprs(kernel3.uses_vcc, kernel3.uses_flat_scratch, 1), 1, 0)-extrasgprs(kernel3.uses_vcc, kernel3.uses_flat_scratch, 1) ; CHECK: .set kernel3.num_vgpr, max(41, .Laliasee_vgpr256_sgpr102.num_vgpr) ; CHECK-NEXT: .set kernel3.num_agpr, max(0, .Laliasee_vgpr256_sgpr102.num_agpr) diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine-true16.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine-true16.mir new file mode 100644 index 0000000000000..792acda60620e --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/dpp_combine-true16.mir @@ -0,0 +1,27 @@ +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=gcn-dpp-combine -mattr=+real-true16 -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN +# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -run-pass=gcn-dpp-combine -mattr=+real-true16 -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN +# XUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=gcn-dpp-combine -mattr=+real-true16 -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150 + +# FIXME-TRUE16 add gfx1200 runline when we have those true16 instructions supported + +--- + +# V_MOV_B16_t16_e64_dpp is unsupported to combine +# GCN-label: name: vop3_u16 +# GCN: %4:vgpr_16 = V_MOV_B16_t16_e64_dpp %3, 0, %1, 0, 1, 15, 15, 1, implicit $exec +# GCN: %6:vgpr_16 = V_MOV_B16_t16_e64_dpp %3, 0, %5, 0, 1, 15, 15, 1, implicit $exec +name: vop3_u16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + %0:vgpr_16 = COPY $vgpr0 + %1:vgpr_16 = COPY $vgpr1 + %2:vgpr_16 = COPY $vgpr2 + %3:vgpr_16 = IMPLICIT_DEF + %4:vgpr_16 = V_MOV_B16_t16_e64_dpp %3, 0, %1, 0, 1, 15, 15, 1, implicit $exec + %5:vgpr_16 = V_ADD_NC_U16_t16_e64 0, %4, 0, %3, 0, 0, implicit $exec + %6:vgpr_16 = V_MOV_B16_t16_e64_dpp %3, 0, %5, 0, 1, 15, 15, 1, implicit $exec + %7:vgpr_16 = V_ADD_NC_U16_t16_e64 4, %6, 8, %5, 0, 0, implicit $exec +... diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine.ll b/llvm/test/CodeGen/AMDGPU/dpp_combine.ll index 5162092f78aca..926c2a3f12aab 100644 --- a/llvm/test/CodeGen/AMDGPU/dpp_combine.ll +++ b/llvm/test/CodeGen/AMDGPU/dpp_combine.ll @@ -1,7 +1,9 @@ -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN -; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN -; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX9GFX10 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX9GFX10 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX11-TRUE16 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX11-FAKE16 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX11-TRUE16 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX11-FAKE16 ; GCN-LABEL: {{^}}dpp_add: ; GCN: global_load_{{dword|b32}} [[V:v[0-9]+]], @@ -63,6 +65,30 @@ define amdgpu_kernel void @dpp_mul(ptr addrspace(1) %arg) { ret void } +; It is not expected to see a sequence of v_mov_b32_dpp feeding into a 16 bit instruction +; GCN-LABEL: {{^}}dpp_fadd_f16: +; GFX9GFX10: global_load_{{dword|b32}} [[V:v[0-9]+]], +; GFX9GFX10: v_add_f16_dpp [[V]], [[V]], [[V]] quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}} +; GFX11-TRUE16: v_mov_b32_dpp {{v[0-9]+}}, {{v[0-9]+}} quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 +; GFX11-TRUE16: v_add_f16_e32 +; GFX11-FAKE16: global_load_{{dword|b32}} [[V:v[0-9]+]], +; GFX11-FAKE16: v_add_f16_e64_dpp [[V]], [[V]], [[V]] quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 +define amdgpu_kernel void @dpp_fadd_f16(ptr addrspace(1) %arg) { + %id = tail call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i32 %id + %load = load i32, ptr addrspace(1) %gep + %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %load, i32 %load, i32 1, i32 15, i32 15, i1 1) #0 + %tmp01 = trunc i32 %tmp0 to i16 + %tmp1 = bitcast i16 %tmp01 to half + %tt = trunc i32 %load to i16 + %t = bitcast i16 %tt to half + %add = fadd half %tmp1, %t + %tmp2 = bitcast half %add to i16 + %tmp3 = zext i16 %tmp2 to i32 + store i32 %tmp3, ptr addrspace(1) %gep + ret void +} + declare i32 @llvm.amdgcn.workitem.id.x() declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1) #0 declare float @llvm.ceil.f32(float) diff --git a/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll b/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll index 512d58d3f996d..e152f2ddd5253 100644 --- a/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll +++ b/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll @@ -24,7 +24,7 @@ define void @use_vcc() #1 { ; GCN: .set indirect_use_vcc.num_vgpr, max(41, use_vcc.num_vgpr) ; GCN: .set indirect_use_vcc.num_agpr, max(0, use_vcc.num_agpr) ; GCN: .set indirect_use_vcc.numbered_sgpr, max(34, use_vcc.numbered_sgpr) -; GCN: .set indirect_use_vcc.private_seg_size, 16+(max(use_vcc.private_seg_size)) +; GCN: .set indirect_use_vcc.private_seg_size, 16+max(use_vcc.private_seg_size) ; GCN: .set indirect_use_vcc.uses_vcc, or(1, use_vcc.uses_vcc) ; GCN: .set indirect_use_vcc.uses_flat_scratch, or(0, use_vcc.uses_flat_scratch) ; GCN: .set indirect_use_vcc.has_dyn_sized_stack, or(0, use_vcc.has_dyn_sized_stack) @@ -42,7 +42,7 @@ define void @indirect_use_vcc() #1 { ; GCN: .set indirect_2level_use_vcc_kernel.num_vgpr, max(32, indirect_use_vcc.num_vgpr) ; GCN: .set indirect_2level_use_vcc_kernel.num_agpr, max(0, indirect_use_vcc.num_agpr) ; GCN: .set indirect_2level_use_vcc_kernel.numbered_sgpr, max(33, indirect_use_vcc.numbered_sgpr) -; GCN: .set indirect_2level_use_vcc_kernel.private_seg_size, 0+(max(indirect_use_vcc.private_seg_size)) +; GCN: .set indirect_2level_use_vcc_kernel.private_seg_size, 0+max(indirect_use_vcc.private_seg_size) ; GCN: .set indirect_2level_use_vcc_kernel.uses_vcc, or(1, indirect_use_vcc.uses_vcc) ; GCN: .set indirect_2level_use_vcc_kernel.uses_flat_scratch, or(1, indirect_use_vcc.uses_flat_scratch) ; GCN: .set indirect_2level_use_vcc_kernel.has_dyn_sized_stack, or(0, indirect_use_vcc.has_dyn_sized_stack) @@ -78,7 +78,7 @@ define void @use_flat_scratch() #1 { ; GCN: .set indirect_use_flat_scratch.num_vgpr, max(41, use_flat_scratch.num_vgpr) ; GCN: .set indirect_use_flat_scratch.num_agpr, max(0, use_flat_scratch.num_agpr) ; GCN: .set indirect_use_flat_scratch.numbered_sgpr, max(34, use_flat_scratch.numbered_sgpr) -; GCN: .set indirect_use_flat_scratch.private_seg_size, 16+(max(use_flat_scratch.private_seg_size)) +; GCN: .set indirect_use_flat_scratch.private_seg_size, 16+max(use_flat_scratch.private_seg_size) ; GCN: .set indirect_use_flat_scratch.uses_vcc, or(1, use_flat_scratch.uses_vcc) ; GCN: .set indirect_use_flat_scratch.uses_flat_scratch, or(0, use_flat_scratch.uses_flat_scratch) ; GCN: .set indirect_use_flat_scratch.has_dyn_sized_stack, or(0, use_flat_scratch.has_dyn_sized_stack) @@ -96,7 +96,7 @@ define void @indirect_use_flat_scratch() #1 { ; GCN: .set indirect_2level_use_flat_scratch_kernel.num_vgpr, max(32, indirect_use_flat_scratch.num_vgpr) ; GCN: .set indirect_2level_use_flat_scratch_kernel.num_agpr, max(0, indirect_use_flat_scratch.num_agpr) ; GCN: .set indirect_2level_use_flat_scratch_kernel.numbered_sgpr, max(33, indirect_use_flat_scratch.numbered_sgpr) -; GCN: .set indirect_2level_use_flat_scratch_kernel.private_seg_size, 0+(max(indirect_use_flat_scratch.private_seg_size)) +; GCN: .set indirect_2level_use_flat_scratch_kernel.private_seg_size, 0+max(indirect_use_flat_scratch.private_seg_size) ; GCN: .set indirect_2level_use_flat_scratch_kernel.uses_vcc, or(1, indirect_use_flat_scratch.uses_vcc) ; GCN: .set indirect_2level_use_flat_scratch_kernel.uses_flat_scratch, or(1, indirect_use_flat_scratch.uses_flat_scratch) ; GCN: .set indirect_2level_use_flat_scratch_kernel.has_dyn_sized_stack, or(0, indirect_use_flat_scratch.has_dyn_sized_stack) @@ -133,7 +133,7 @@ define void @use_10_vgpr() #1 { ; GCN: .set indirect_use_10_vgpr.num_vgpr, max(41, use_10_vgpr.num_vgpr) ; GCN: .set indirect_use_10_vgpr.num_agpr, max(0, use_10_vgpr.num_agpr) ; GCN: .set indirect_use_10_vgpr.numbered_sgpr, max(34, use_10_vgpr.numbered_sgpr) -; GCN: .set indirect_use_10_vgpr.private_seg_size, 16+(max(use_10_vgpr.private_seg_size)) +; GCN: .set indirect_use_10_vgpr.private_seg_size, 16+max(use_10_vgpr.private_seg_size) ; GCN: .set indirect_use_10_vgpr.uses_vcc, or(1, use_10_vgpr.uses_vcc) ; GCN: .set indirect_use_10_vgpr.uses_flat_scratch, or(0, use_10_vgpr.uses_flat_scratch) ; GCN: .set indirect_use_10_vgpr.has_dyn_sized_stack, or(0, use_10_vgpr.has_dyn_sized_stack) @@ -151,7 +151,7 @@ define void @indirect_use_10_vgpr() #0 { ; GCN: .set indirect_2_level_use_10_vgpr.num_vgpr, max(32, indirect_use_10_vgpr.num_vgpr) ; GCN: .set indirect_2_level_use_10_vgpr.num_agpr, max(0, indirect_use_10_vgpr.num_agpr) ; GCN: .set indirect_2_level_use_10_vgpr.numbered_sgpr, max(33, indirect_use_10_vgpr.numbered_sgpr) -; GCN: .set indirect_2_level_use_10_vgpr.private_seg_size, 0+(max(indirect_use_10_vgpr.private_seg_size)) +; GCN: .set indirect_2_level_use_10_vgpr.private_seg_size, 0+max(indirect_use_10_vgpr.private_seg_size) ; GCN: .set indirect_2_level_use_10_vgpr.uses_vcc, or(1, indirect_use_10_vgpr.uses_vcc) ; GCN: .set indirect_2_level_use_10_vgpr.uses_flat_scratch, or(1, indirect_use_10_vgpr.uses_flat_scratch) ; GCN: .set indirect_2_level_use_10_vgpr.has_dyn_sized_stack, or(0, indirect_use_10_vgpr.has_dyn_sized_stack) @@ -187,7 +187,7 @@ define void @use_50_vgpr() #1 { ; GCN: .set indirect_use_50_vgpr.num_vgpr, max(41, use_50_vgpr.num_vgpr) ; GCN: .set indirect_use_50_vgpr.num_agpr, max(0, use_50_vgpr.num_agpr) ; GCN: .set indirect_use_50_vgpr.numbered_sgpr, max(34, use_50_vgpr.numbered_sgpr) -; GCN: .set indirect_use_50_vgpr.private_seg_size, 16+(max(use_50_vgpr.private_seg_size)) +; GCN: .set indirect_use_50_vgpr.private_seg_size, 16+max(use_50_vgpr.private_seg_size) ; GCN: .set indirect_use_50_vgpr.uses_vcc, or(1, use_50_vgpr.uses_vcc) ; GCN: .set indirect_use_50_vgpr.uses_flat_scratch, or(0, use_50_vgpr.uses_flat_scratch) ; GCN: .set indirect_use_50_vgpr.has_dyn_sized_stack, or(0, use_50_vgpr.has_dyn_sized_stack) @@ -223,7 +223,7 @@ define void @use_80_sgpr() #1 { ; GCN: .set indirect_use_80_sgpr.num_vgpr, max(41, use_80_sgpr.num_vgpr) ; GCN: .set indirect_use_80_sgpr.num_agpr, max(0, use_80_sgpr.num_agpr) ; GCN: .set indirect_use_80_sgpr.numbered_sgpr, max(34, use_80_sgpr.numbered_sgpr) -; GCN: .set indirect_use_80_sgpr.private_seg_size, 16+(max(use_80_sgpr.private_seg_size)) +; GCN: .set indirect_use_80_sgpr.private_seg_size, 16+max(use_80_sgpr.private_seg_size) ; GCN: .set indirect_use_80_sgpr.uses_vcc, or(1, use_80_sgpr.uses_vcc) ; GCN: .set indirect_use_80_sgpr.uses_flat_scratch, or(0, use_80_sgpr.uses_flat_scratch) ; GCN: .set indirect_use_80_sgpr.has_dyn_sized_stack, or(0, use_80_sgpr.has_dyn_sized_stack) @@ -241,7 +241,7 @@ define void @indirect_use_80_sgpr() #1 { ; GCN: .set indirect_2_level_use_80_sgpr.num_vgpr, max(32, indirect_use_80_sgpr.num_vgpr) ; GCN: .set indirect_2_level_use_80_sgpr.num_agpr, max(0, indirect_use_80_sgpr.num_agpr) ; GCN: .set indirect_2_level_use_80_sgpr.numbered_sgpr, max(33, indirect_use_80_sgpr.numbered_sgpr) -; GCN: .set indirect_2_level_use_80_sgpr.private_seg_size, 0+(max(indirect_use_80_sgpr.private_seg_size)) +; GCN: .set indirect_2_level_use_80_sgpr.private_seg_size, 0+max(indirect_use_80_sgpr.private_seg_size) ; GCN: .set indirect_2_level_use_80_sgpr.uses_vcc, or(1, indirect_use_80_sgpr.uses_vcc) ; GCN: .set indirect_2_level_use_80_sgpr.uses_flat_scratch, or(1, indirect_use_80_sgpr.uses_flat_scratch) ; GCN: .set indirect_2_level_use_80_sgpr.has_dyn_sized_stack, or(0, indirect_use_80_sgpr.has_dyn_sized_stack) @@ -297,7 +297,7 @@ define void @use_stack1() #1 { ; GCN: .set indirect_use_stack.num_vgpr, max(41, use_stack0.num_vgpr) ; GCN: .set indirect_use_stack.num_agpr, max(0, use_stack0.num_agpr) ; GCN: .set indirect_use_stack.numbered_sgpr, max(34, use_stack0.numbered_sgpr) -; GCN: .set indirect_use_stack.private_seg_size, 80+(max(use_stack0.private_seg_size)) +; GCN: .set indirect_use_stack.private_seg_size, 80+max(use_stack0.private_seg_size) ; GCN: .set indirect_use_stack.uses_vcc, or(1, use_stack0.uses_vcc) ; GCN: .set indirect_use_stack.uses_flat_scratch, or(0, use_stack0.uses_flat_scratch) ; GCN: .set indirect_use_stack.has_dyn_sized_stack, or(0, use_stack0.has_dyn_sized_stack) @@ -317,7 +317,7 @@ define void @indirect_use_stack() #1 { ; GCN: .set indirect_2_level_use_stack.num_vgpr, max(32, indirect_use_stack.num_vgpr) ; GCN: .set indirect_2_level_use_stack.num_agpr, max(0, indirect_use_stack.num_agpr) ; GCN: .set indirect_2_level_use_stack.numbered_sgpr, max(33, indirect_use_stack.numbered_sgpr) -; GCN: .set indirect_2_level_use_stack.private_seg_size, 0+(max(indirect_use_stack.private_seg_size)) +; GCN: .set indirect_2_level_use_stack.private_seg_size, 0+max(indirect_use_stack.private_seg_size) ; GCN: .set indirect_2_level_use_stack.uses_vcc, or(1, indirect_use_stack.uses_vcc) ; GCN: .set indirect_2_level_use_stack.uses_flat_scratch, or(1, indirect_use_stack.uses_flat_scratch) ; GCN: .set indirect_2_level_use_stack.has_dyn_sized_stack, or(0, indirect_use_stack.has_dyn_sized_stack) @@ -337,7 +337,7 @@ define amdgpu_kernel void @indirect_2_level_use_stack() #0 { ; GCN: .set multi_call_use_use_stack.num_vgpr, max(41, use_stack0.num_vgpr, use_stack1.num_vgpr) ; GCN: .set multi_call_use_use_stack.num_agpr, max(0, use_stack0.num_agpr, use_stack1.num_agpr) ; GCN: .set multi_call_use_use_stack.numbered_sgpr, max(52, use_stack0.numbered_sgpr, use_stack1.numbered_sgpr) -; GCN: .set multi_call_use_use_stack.private_seg_size, 0+(max(use_stack0.private_seg_size, use_stack1.private_seg_size)) +; GCN: .set multi_call_use_use_stack.private_seg_size, 0+max(use_stack0.private_seg_size, use_stack1.private_seg_size) ; GCN: .set multi_call_use_use_stack.uses_vcc, or(1, use_stack0.uses_vcc, use_stack1.uses_vcc) ; GCN: .set multi_call_use_use_stack.uses_flat_scratch, or(1, use_stack0.uses_flat_scratch, use_stack1.uses_flat_scratch) ; GCN: .set multi_call_use_use_stack.has_dyn_sized_stack, or(0, use_stack0.has_dyn_sized_stack, use_stack1.has_dyn_sized_stack) @@ -358,7 +358,7 @@ declare void @external() #0 ; GCN: .set multi_call_with_external.num_vgpr, max(41, amdgpu.max_num_vgpr) ; GCN: .set multi_call_with_external.num_agpr, max(0, amdgpu.max_num_agpr) ; GCN: .set multi_call_with_external.numbered_sgpr, max(52, amdgpu.max_num_sgpr) -; GCN: .set multi_call_with_external.private_seg_size, 0+(max(use_stack0.private_seg_size, use_stack1.private_seg_size)) +; GCN: .set multi_call_with_external.private_seg_size, 0+max(use_stack0.private_seg_size, use_stack1.private_seg_size) ; GCN: .set multi_call_with_external.uses_vcc, 1 ; GCN: .set multi_call_with_external.uses_flat_scratch, 1 ; GCN: .set multi_call_with_external.has_dyn_sized_stack, 1 @@ -378,7 +378,7 @@ define amdgpu_kernel void @multi_call_with_external() #0 { ; GCN: .set multi_call_with_external_and_duplicates.num_vgpr, max(41, amdgpu.max_num_vgpr) ; GCN: .set multi_call_with_external_and_duplicates.num_agpr, max(0, amdgpu.max_num_agpr) ; GCN: .set multi_call_with_external_and_duplicates.numbered_sgpr, max(54, amdgpu.max_num_sgpr) -; GCN: .set multi_call_with_external_and_duplicates.private_seg_size, 0+(max(use_stack0.private_seg_size, use_stack1.private_seg_size)) +; GCN: .set multi_call_with_external_and_duplicates.private_seg_size, 0+max(use_stack0.private_seg_size, use_stack1.private_seg_size) ; GCN: .set multi_call_with_external_and_duplicates.uses_vcc, 1 ; GCN: .set multi_call_with_external_and_duplicates.uses_flat_scratch, 1 ; GCN: .set multi_call_with_external_and_duplicates.has_dyn_sized_stack, 1 @@ -467,7 +467,7 @@ ret: ; GCN: .set usage_direct_recursion.num_vgpr, max(32, direct_recursion_use_stack.num_vgpr) ; GCN: .set usage_direct_recursion.num_agpr, max(0, direct_recursion_use_stack.num_agpr) ; GCN: .set usage_direct_recursion.numbered_sgpr, max(33, direct_recursion_use_stack.numbered_sgpr) -; GCN: .set usage_direct_recursion.private_seg_size, 0+(max(direct_recursion_use_stack.private_seg_size)) +; GCN: .set usage_direct_recursion.private_seg_size, 0+max(direct_recursion_use_stack.private_seg_size) ; GCN: .set usage_direct_recursion.uses_vcc, or(1, direct_recursion_use_stack.uses_vcc) ; GCN: .set usage_direct_recursion.uses_flat_scratch, or(1, direct_recursion_use_stack.uses_flat_scratch) ; GCN: .set usage_direct_recursion.has_dyn_sized_stack, or(0, direct_recursion_use_stack.has_dyn_sized_stack) @@ -485,15 +485,15 @@ define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 { ; GCN: .set multi_stage_recurse2.num_vgpr, max(43, multi_stage_recurse1.num_vgpr) ; GCN: .set multi_stage_recurse2.num_agpr, max(0, multi_stage_recurse1.num_agpr) ; GCN: .set multi_stage_recurse2.numbered_sgpr, max(34, multi_stage_recurse1.numbered_sgpr) -; GCN: .set multi_stage_recurse2.private_seg_size, 16+(max(multi_stage_recurse1.private_seg_size)) +; GCN: .set multi_stage_recurse2.private_seg_size, 16+max(multi_stage_recurse1.private_seg_size) ; GCN: .set multi_stage_recurse2.uses_vcc, or(1, multi_stage_recurse1.uses_vcc) ; GCN: .set multi_stage_recurse2.uses_flat_scratch, or(0, multi_stage_recurse1.uses_flat_scratch) ; GCN: .set multi_stage_recurse2.has_dyn_sized_stack, or(0, multi_stage_recurse1.has_dyn_sized_stack) ; GCN: .set multi_stage_recurse2.has_recursion, or(1, multi_stage_recurse1.has_recursion) ; GCN: .set multi_stage_recurse2.has_indirect_call, or(0, multi_stage_recurse1.has_indirect_call) -; GCN: TotalNumSgprs: multi_stage_recurse2.numbered_sgpr+(extrasgprs(multi_stage_recurse2.uses_vcc, multi_stage_recurse2.uses_flat_scratch, 1)) +; GCN: TotalNumSgprs: multi_stage_recurse2.numbered_sgpr+extrasgprs(multi_stage_recurse2.uses_vcc, multi_stage_recurse2.uses_flat_scratch, 1) ; GCN: NumVgprs: max(43, multi_stage_recurse1.num_vgpr) -; GCN: ScratchSize: 16+(max(multi_stage_recurse1.private_seg_size)) +; GCN: ScratchSize: 16+max(multi_stage_recurse1.private_seg_size) ; GCN-LABEL: {{^}}multi_stage_recurse1: ; GCN: .set multi_stage_recurse1.num_vgpr, max(48, amdgpu.max_num_vgpr) ; GCN: .set multi_stage_recurse1.num_agpr, max(0, amdgpu.max_num_agpr) @@ -522,7 +522,7 @@ define void @multi_stage_recurse2(i32 %val) #2 { ; GCN: .set usage_multi_stage_recurse.num_vgpr, max(32, multi_stage_recurse1.num_vgpr) ; GCN: .set usage_multi_stage_recurse.num_agpr, max(0, multi_stage_recurse1.num_agpr) ; GCN: .set usage_multi_stage_recurse.numbered_sgpr, max(33, multi_stage_recurse1.numbered_sgpr) -; GCN: .set usage_multi_stage_recurse.private_seg_size, 0+(max(multi_stage_recurse1.private_seg_size)) +; GCN: .set usage_multi_stage_recurse.private_seg_size, 0+max(multi_stage_recurse1.private_seg_size) ; GCN: .set usage_multi_stage_recurse.uses_vcc, or(1, multi_stage_recurse1.uses_vcc) ; GCN: .set usage_multi_stage_recurse.uses_flat_scratch, or(1, multi_stage_recurse1.uses_flat_scratch) ; GCN: .set usage_multi_stage_recurse.has_dyn_sized_stack, or(0, multi_stage_recurse1.has_dyn_sized_stack) @@ -540,15 +540,15 @@ define amdgpu_kernel void @usage_multi_stage_recurse(i32 %n) #0 { ; GCN: .set multi_stage_recurse_noattr2.num_vgpr, max(41, multi_stage_recurse_noattr1.num_vgpr) ; GCN: .set multi_stage_recurse_noattr2.num_agpr, max(0, multi_stage_recurse_noattr1.num_agpr) ; GCN: .set multi_stage_recurse_noattr2.numbered_sgpr, max(54, multi_stage_recurse_noattr1.numbered_sgpr) -; GCN: .set multi_stage_recurse_noattr2.private_seg_size, 16+(max(multi_stage_recurse_noattr1.private_seg_size)) +; GCN: .set multi_stage_recurse_noattr2.private_seg_size, 16+max(multi_stage_recurse_noattr1.private_seg_size) ; GCN: .set multi_stage_recurse_noattr2.uses_vcc, or(1, multi_stage_recurse_noattr1.uses_vcc) ; GCN: .set multi_stage_recurse_noattr2.uses_flat_scratch, or(0, multi_stage_recurse_noattr1.uses_flat_scratch) ; GCN: .set multi_stage_recurse_noattr2.has_dyn_sized_stack, or(0, multi_stage_recurse_noattr1.has_dyn_sized_stack) ; GCN: .set multi_stage_recurse_noattr2.has_recursion, or(0, multi_stage_recurse_noattr1.has_recursion) ; GCN: .set multi_stage_recurse_noattr2.has_indirect_call, or(0, multi_stage_recurse_noattr1.has_indirect_call) -; GCN: TotalNumSgprs: multi_stage_recurse_noattr2.numbered_sgpr+(extrasgprs(multi_stage_recurse_noattr2.uses_vcc, multi_stage_recurse_noattr2.uses_flat_scratch, 1)) +; GCN: TotalNumSgprs: multi_stage_recurse_noattr2.numbered_sgpr+extrasgprs(multi_stage_recurse_noattr2.uses_vcc, multi_stage_recurse_noattr2.uses_flat_scratch, 1) ; GCN: NumVgprs: max(41, multi_stage_recurse_noattr1.num_vgpr) -; GCN: ScratchSize: 16+(max(multi_stage_recurse_noattr1.private_seg_size)) +; GCN: ScratchSize: 16+max(multi_stage_recurse_noattr1.private_seg_size) ; GCN-LABEL: {{^}}multi_stage_recurse_noattr1: ; GCN: .set multi_stage_recurse_noattr1.num_vgpr, max(41, amdgpu.max_num_vgpr) ; GCN: .set multi_stage_recurse_noattr1.num_agpr, max(0, amdgpu.max_num_agpr) @@ -577,7 +577,7 @@ define void @multi_stage_recurse_noattr2(i32 %val) #0 { ; GCN: .set usage_multi_stage_recurse_noattrs.num_vgpr, max(32, multi_stage_recurse_noattr1.num_vgpr) ; GCN: .set usage_multi_stage_recurse_noattrs.num_agpr, max(0, multi_stage_recurse_noattr1.num_agpr) ; GCN: .set usage_multi_stage_recurse_noattrs.numbered_sgpr, max(33, multi_stage_recurse_noattr1.numbered_sgpr) -; GCN: .set usage_multi_stage_recurse_noattrs.private_seg_size, 0+(max(multi_stage_recurse_noattr1.private_seg_size)) +; GCN: .set usage_multi_stage_recurse_noattrs.private_seg_size, 0+max(multi_stage_recurse_noattr1.private_seg_size) ; GCN: .set usage_multi_stage_recurse_noattrs.uses_vcc, or(1, multi_stage_recurse_noattr1.uses_vcc) ; GCN: .set usage_multi_stage_recurse_noattrs.uses_flat_scratch, or(1, multi_stage_recurse_noattr1.uses_flat_scratch) ; GCN: .set usage_multi_stage_recurse_noattrs.has_dyn_sized_stack, or(0, multi_stage_recurse_noattr1.has_dyn_sized_stack) @@ -595,7 +595,7 @@ define amdgpu_kernel void @usage_multi_stage_recurse_noattrs(i32 %n) #0 { ; GCN: .set multi_call_with_multi_stage_recurse.num_vgpr, max(41, use_stack0.num_vgpr, use_stack1.num_vgpr, multi_stage_recurse1.num_vgpr) ; GCN: .set multi_call_with_multi_stage_recurse.num_agpr, max(0, use_stack0.num_agpr, use_stack1.num_agpr, multi_stage_recurse1.num_agpr) ; GCN: .set multi_call_with_multi_stage_recurse.numbered_sgpr, max(53, use_stack0.numbered_sgpr, use_stack1.numbered_sgpr, multi_stage_recurse1.numbered_sgpr) -; GCN: .set multi_call_with_multi_stage_recurse.private_seg_size, 0+(max(use_stack0.private_seg_size, use_stack1.private_seg_size, multi_stage_recurse1.private_seg_size)) +; GCN: .set multi_call_with_multi_stage_recurse.private_seg_size, 0+max(use_stack0.private_seg_size, use_stack1.private_seg_size, multi_stage_recurse1.private_seg_size) ; GCN: .set multi_call_with_multi_stage_recurse.uses_vcc, or(1, use_stack0.uses_vcc, use_stack1.uses_vcc, multi_stage_recurse1.uses_vcc) ; GCN: .set multi_call_with_multi_stage_recurse.uses_flat_scratch, or(1, use_stack0.uses_flat_scratch, use_stack1.uses_flat_scratch, multi_stage_recurse1.uses_flat_scratch) ; GCN: .set multi_call_with_multi_stage_recurse.has_dyn_sized_stack, or(0, use_stack0.has_dyn_sized_stack, use_stack1.has_dyn_sized_stack, multi_stage_recurse1.has_dyn_sized_stack) diff --git a/llvm/test/CodeGen/AMDGPU/load-local-redundant-copies.ll b/llvm/test/CodeGen/AMDGPU/load-local-redundant-copies.ll index 64d7f93760fd5..a6ce512164b89 100644 --- a/llvm/test/CodeGen/AMDGPU/load-local-redundant-copies.ll +++ b/llvm/test/CodeGen/AMDGPU/load-local-redundant-copies.ll @@ -66,39 +66,38 @@ define amdgpu_vs void @test_3(i32 inreg %arg1, i32 inreg %arg2, ptr addrspace(8) ; CHECK-NEXT: s_mov_b32 s6, s4 ; CHECK-NEXT: s_mov_b32 s5, s3 ; CHECK-NEXT: s_mov_b32 s4, s2 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, 20, v1 -; CHECK-NEXT: v_add_i32_e32 v3, vcc, 16, v1 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, 12, v1 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, 8, v1 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, 4, v1 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, 12, v1 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, 8, v1 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, 4, v1 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, 20, v1 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, 16, v1 ; CHECK-NEXT: v_mov_b32_e32 v9, s0 -; CHECK-NEXT: v_add_i32_e32 v10, vcc, 20, v2 -; CHECK-NEXT: v_add_i32_e32 v11, vcc, 16, v2 +; CHECK-NEXT: v_add_i32_e32 v10, vcc, 12, v2 +; CHECK-NEXT: v_add_i32_e32 v11, vcc, 8, v2 ; CHECK-NEXT: s_mov_b32 m0, -1 -; CHECK-NEXT: ds_read_b32 v7, v3 -; CHECK-NEXT: ds_read_b32 v6, v4 -; CHECK-NEXT: ds_read_b32 v5, v5 -; CHECK-NEXT: ds_read_b32 v4, v8 -; CHECK-NEXT: ds_read_b32 v8, v0 +; CHECK-NEXT: ds_read_b32 v5, v3 +; CHECK-NEXT: ds_read_b32 v4, v4 +; CHECK-NEXT: ds_read_b32 v8, v6 +; CHECK-NEXT: ds_read_b32 v7, v7 +; CHECK-NEXT: ds_read_b32 v6, v0 ; CHECK-NEXT: ds_read_b32 v3, v1 -; CHECK-NEXT: v_add_i32_e32 v1, vcc, 12, v2 -; CHECK-NEXT: v_add_i32_e32 v12, vcc, 8, v2 -; CHECK-NEXT: v_add_i32_e32 v13, vcc, 4, v2 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, 4, v2 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, 20, v2 +; CHECK-NEXT: v_add_i32_e32 v12, vcc, 16, v2 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: tbuffer_store_format_xyzw v[3:6], v9, s[4:7], s1 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen offset:264 glc slc ; CHECK-NEXT: tbuffer_store_format_xy v[7:8], v9, s[4:7], s1 format:[BUF_DATA_FORMAT_INVALID,BUF_NUM_FORMAT_UINT] idxen offset:280 glc slc -; CHECK-NEXT: ds_read_b32 v0, v11 ; CHECK-NEXT: s_waitcnt expcnt(1) -; CHECK-NEXT: ds_read_b32 v5, v1 -; CHECK-NEXT: ds_read_b32 v4, v12 -; CHECK-NEXT: ds_read_b32 v3, v13 +; CHECK-NEXT: ds_read_b32 v4, v11 +; CHECK-NEXT: ds_read_b32 v3, v0 +; CHECK-NEXT: ds_read_b32 v1, v1 +; CHECK-NEXT: ds_read_b32 v0, v12 +; CHECK-NEXT: ds_read_b32 v5, v10 ; CHECK-NEXT: ds_read_b32 v2, v2 -; CHECK-NEXT: ds_read_b32 v1, v10 -; CHECK-NEXT: s_waitcnt lgkmcnt(5) +; CHECK-NEXT: s_waitcnt lgkmcnt(2) ; CHECK-NEXT: exp mrt0 off, off, off, off -; CHECK-NEXT: s_waitcnt lgkmcnt(1) -; CHECK-NEXT: tbuffer_store_format_xyzw v[2:5], v9, s[4:7], s1 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen offset:240 glc slc ; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: tbuffer_store_format_xyzw v[2:5], v9, s[4:7], s1 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen offset:240 glc slc ; CHECK-NEXT: tbuffer_store_format_xy v[0:1], v9, s[4:7], s1 format:[BUF_DATA_FORMAT_INVALID,BUF_NUM_FORMAT_UINT] idxen offset:256 glc slc ; CHECK-NEXT: s_endpgm %load1 = load <6 x float>, ptr addrspace(3) %arg5, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/mcexpr-knownbits-assign-crash-gh-issue-110930.ll b/llvm/test/CodeGen/AMDGPU/mcexpr-knownbits-assign-crash-gh-issue-110930.ll index 52f380b7f80a3..60bbf4646ee03 100644 --- a/llvm/test/CodeGen/AMDGPU/mcexpr-knownbits-assign-crash-gh-issue-110930.ll +++ b/llvm/test/CodeGen/AMDGPU/mcexpr-knownbits-assign-crash-gh-issue-110930.ll @@ -111,7 +111,7 @@ define void @HU_Start() { ; CHECK: .set P_SpawnPlayer.num_vgpr, max(43, G_PlayerReborn.num_vgpr, P_SetThingPosition.num_vgpr, P_SetupPsprites.num_vgpr, HU_Start.num_vgpr) ; CHECK: .set P_SpawnPlayer.num_agpr, max(0, G_PlayerReborn.num_agpr, P_SetThingPosition.num_agpr, P_SetupPsprites.num_agpr, HU_Start.num_agpr) ; CHECK: .set P_SpawnPlayer.numbered_sgpr, max(84, G_PlayerReborn.numbered_sgpr, P_SetThingPosition.numbered_sgpr, P_SetupPsprites.numbered_sgpr, HU_Start.numbered_sgpr) -; CHECK: .set P_SpawnPlayer.private_seg_size, 16+(max(G_PlayerReborn.private_seg_size, P_SetThingPosition.private_seg_size, P_SetupPsprites.private_seg_size, HU_Start.private_seg_size)) +; CHECK: .set P_SpawnPlayer.private_seg_size, 16+max(G_PlayerReborn.private_seg_size, P_SetThingPosition.private_seg_size, P_SetupPsprites.private_seg_size, HU_Start.private_seg_size) ; CHECK: .set P_SpawnPlayer.uses_vcc, or(1, G_PlayerReborn.uses_vcc, P_SetThingPosition.uses_vcc, P_SetupPsprites.uses_vcc, HU_Start.uses_vcc) ; CHECK: .set P_SpawnPlayer.uses_flat_scratch, or(0, G_PlayerReborn.uses_flat_scratch, P_SetThingPosition.uses_flat_scratch, P_SetupPsprites.uses_flat_scratch, HU_Start.uses_flat_scratch) ; CHECK: .set P_SpawnPlayer.has_dyn_sized_stack, or(0, G_PlayerReborn.has_dyn_sized_stack, P_SetThingPosition.has_dyn_sized_stack, P_SetupPsprites.has_dyn_sized_stack, HU_Start.has_dyn_sized_stack) @@ -145,7 +145,7 @@ define void @I_Error(...) { ; CHECK: .set G_DoReborn.num_vgpr, max(44, P_RemoveMobj.num_vgpr, P_SpawnMobj.num_vgpr, P_SpawnPlayer.num_vgpr, I_Error.num_vgpr) ; CHECK: .set G_DoReborn.num_agpr, max(0, P_RemoveMobj.num_agpr, P_SpawnMobj.num_agpr, P_SpawnPlayer.num_agpr, I_Error.num_agpr) ; CHECK: .set G_DoReborn.numbered_sgpr, max(104, P_RemoveMobj.numbered_sgpr, P_SpawnMobj.numbered_sgpr, P_SpawnPlayer.numbered_sgpr, I_Error.numbered_sgpr) -; CHECK: .set G_DoReborn.private_seg_size, 32+(max(P_RemoveMobj.private_seg_size, P_SpawnMobj.private_seg_size, P_SpawnPlayer.private_seg_size, I_Error.private_seg_size)) +; CHECK: .set G_DoReborn.private_seg_size, 32+max(P_RemoveMobj.private_seg_size, P_SpawnMobj.private_seg_size, P_SpawnPlayer.private_seg_size, I_Error.private_seg_size) ; CHECK: .set G_DoReborn.uses_vcc, or(1, P_RemoveMobj.uses_vcc, P_SpawnMobj.uses_vcc, P_SpawnPlayer.uses_vcc, I_Error.uses_vcc) ; CHECK: .set G_DoReborn.uses_flat_scratch, or(0, P_RemoveMobj.uses_flat_scratch, P_SpawnMobj.uses_flat_scratch, P_SpawnPlayer.uses_flat_scratch, I_Error.uses_flat_scratch) ; CHECK: .set G_DoReborn.has_dyn_sized_stack, or(0, P_RemoveMobj.has_dyn_sized_stack, P_SpawnMobj.has_dyn_sized_stack, P_SpawnPlayer.has_dyn_sized_stack, I_Error.has_dyn_sized_stack) @@ -219,7 +219,7 @@ define void @F_Ticker() { ; CHECK: .set G_CheckDemoStatus.num_vgpr, max(43, I_Quit.num_vgpr, D_AdvanceDemo.num_vgpr, I_Error.num_vgpr) ; CHECK: .set G_CheckDemoStatus.num_agpr, max(0, I_Quit.num_agpr, D_AdvanceDemo.num_agpr, I_Error.num_agpr) ; CHECK: .set G_CheckDemoStatus.numbered_sgpr, max(84, I_Quit.numbered_sgpr, D_AdvanceDemo.numbered_sgpr, I_Error.numbered_sgpr) -; CHECK: .set G_CheckDemoStatus.private_seg_size, 32+(max(I_Quit.private_seg_size, D_AdvanceDemo.private_seg_size, I_Error.private_seg_size)) +; CHECK: .set G_CheckDemoStatus.private_seg_size, 32+max(I_Quit.private_seg_size, D_AdvanceDemo.private_seg_size, I_Error.private_seg_size) ; CHECK: .set G_CheckDemoStatus.uses_vcc, or(1, I_Quit.uses_vcc, D_AdvanceDemo.uses_vcc, I_Error.uses_vcc) ; CHECK: .set G_CheckDemoStatus.uses_flat_scratch, or(0, I_Quit.uses_flat_scratch, D_AdvanceDemo.uses_flat_scratch, I_Error.uses_flat_scratch) ; CHECK: .set G_CheckDemoStatus.has_dyn_sized_stack, or(0, I_Quit.has_dyn_sized_stack, D_AdvanceDemo.has_dyn_sized_stack, I_Error.has_dyn_sized_stack) @@ -265,7 +265,7 @@ define ptr @P_SaveGameFile() { ; CHECK: .set R_FlatNumForName.num_vgpr, max(42, I_Error.num_vgpr) ; CHECK: .set R_FlatNumForName.num_agpr, max(0, I_Error.num_agpr) ; CHECK: .set R_FlatNumForName.numbered_sgpr, max(56, I_Error.numbered_sgpr) -; CHECK: .set R_FlatNumForName.private_seg_size, 16+(max(I_Error.private_seg_size)) +; CHECK: .set R_FlatNumForName.private_seg_size, 16+max(I_Error.private_seg_size) ; CHECK: .set R_FlatNumForName.uses_vcc, or(1, I_Error.uses_vcc) ; CHECK: .set R_FlatNumForName.uses_flat_scratch, or(0, I_Error.uses_flat_scratch) ; CHECK: .set R_FlatNumForName.has_dyn_sized_stack, or(0, I_Error.has_dyn_sized_stack) @@ -280,7 +280,7 @@ define i32 @R_FlatNumForName() { ; CHECK: .set R_TextureNumForName.num_vgpr, max(42, R_FlatNumForName.num_vgpr) ; CHECK: .set R_TextureNumForName.num_agpr, max(0, R_FlatNumForName.num_agpr) ; CHECK: .set R_TextureNumForName.numbered_sgpr, max(56, R_FlatNumForName.numbered_sgpr) -; CHECK: .set R_TextureNumForName.private_seg_size, 16+(max(R_FlatNumForName.private_seg_size)) +; CHECK: .set R_TextureNumForName.private_seg_size, 16+max(R_FlatNumForName.private_seg_size) ; CHECK: .set R_TextureNumForName.uses_vcc, or(1, R_FlatNumForName.uses_vcc) ; CHECK: .set R_TextureNumForName.uses_flat_scratch, or(0, R_FlatNumForName.uses_flat_scratch) ; CHECK: .set R_TextureNumForName.has_dyn_sized_stack, or(0, R_FlatNumForName.has_dyn_sized_stack) @@ -295,7 +295,7 @@ define i32 @R_TextureNumForName() { ; CHECK: .set G_Ticker.num_vgpr, max(47, G_DoReborn.num_vgpr, F_Ticker.num_vgpr, AM_Stop.num_vgpr, F_StartFinale.num_vgpr, D_AdvanceDemo.num_vgpr, R_FlatNumForName.num_vgpr, R_TextureNumForName.num_vgpr, P_TempSaveGameFile.num_vgpr, P_SaveGameFile.num_vgpr, I_Error.num_vgpr) ; CHECK: .set G_Ticker.num_agpr, max(0, G_DoReborn.num_agpr, F_Ticker.num_agpr, AM_Stop.num_agpr, F_StartFinale.num_agpr, D_AdvanceDemo.num_agpr, R_FlatNumForName.num_agpr, R_TextureNumForName.num_agpr, P_TempSaveGameFile.num_agpr, P_SaveGameFile.num_agpr, I_Error.num_agpr) ; CHECK: .set G_Ticker.numbered_sgpr, max(105, G_DoReborn.numbered_sgpr, F_Ticker.numbered_sgpr, AM_Stop.numbered_sgpr, F_StartFinale.numbered_sgpr, D_AdvanceDemo.numbered_sgpr, R_FlatNumForName.numbered_sgpr, R_TextureNumForName.numbered_sgpr, P_TempSaveGameFile.numbered_sgpr, P_SaveGameFile.numbered_sgpr, I_Error.numbered_sgpr) -; CHECK: .set G_Ticker.private_seg_size, 48+(max(G_DoReborn.private_seg_size, F_Ticker.private_seg_size, AM_Stop.private_seg_size, F_StartFinale.private_seg_size, D_AdvanceDemo.private_seg_size, R_FlatNumForName.private_seg_size, R_TextureNumForName.private_seg_size, P_TempSaveGameFile.private_seg_size, P_SaveGameFile.private_seg_size, I_Error.private_seg_size)) +; CHECK: .set G_Ticker.private_seg_size, 48+max(G_DoReborn.private_seg_size, F_Ticker.private_seg_size, AM_Stop.private_seg_size, F_StartFinale.private_seg_size, D_AdvanceDemo.private_seg_size, R_FlatNumForName.private_seg_size, R_TextureNumForName.private_seg_size, P_TempSaveGameFile.private_seg_size, P_SaveGameFile.private_seg_size, I_Error.private_seg_size) ; CHECK: .set G_Ticker.uses_vcc, or(1, G_DoReborn.uses_vcc, F_Ticker.uses_vcc, AM_Stop.uses_vcc, F_StartFinale.uses_vcc, D_AdvanceDemo.uses_vcc, R_FlatNumForName.uses_vcc, R_TextureNumForName.uses_vcc, P_TempSaveGameFile.uses_vcc, P_SaveGameFile.uses_vcc, I_Error.uses_vcc) ; CHECK: .set G_Ticker.uses_flat_scratch, or(0, G_DoReborn.uses_flat_scratch, F_Ticker.uses_flat_scratch, AM_Stop.uses_flat_scratch, F_StartFinale.uses_flat_scratch, D_AdvanceDemo.uses_flat_scratch, R_FlatNumForName.uses_flat_scratch, R_TextureNumForName.uses_flat_scratch, P_TempSaveGameFile.uses_flat_scratch, P_SaveGameFile.uses_flat_scratch, I_Error.uses_flat_scratch) ; CHECK: .set G_Ticker.has_dyn_sized_stack, or(0, G_DoReborn.has_dyn_sized_stack, F_Ticker.has_dyn_sized_stack, AM_Stop.has_dyn_sized_stack, F_StartFinale.has_dyn_sized_stack, D_AdvanceDemo.has_dyn_sized_stack, R_FlatNumForName.has_dyn_sized_stack, R_TextureNumForName.has_dyn_sized_stack, P_TempSaveGameFile.has_dyn_sized_stack, P_SaveGameFile.has_dyn_sized_stack, I_Error.has_dyn_sized_stack) @@ -319,7 +319,7 @@ define void @G_Ticker() { ; CHECK: .set RunTic.num_vgpr, max(47, G_CheckDemoStatus.num_vgpr, D_AdvanceDemo.num_vgpr, G_Ticker.num_vgpr) ; CHECK: .set RunTic.num_agpr, max(0, G_CheckDemoStatus.num_agpr, D_AdvanceDemo.num_agpr, G_Ticker.num_agpr) ; CHECK: .set RunTic.numbered_sgpr, max(105, G_CheckDemoStatus.numbered_sgpr, D_AdvanceDemo.numbered_sgpr, G_Ticker.numbered_sgpr) -; CHECK: .set RunTic.private_seg_size, 32+(max(G_CheckDemoStatus.private_seg_size, D_AdvanceDemo.private_seg_size, G_Ticker.private_seg_size)) +; CHECK: .set RunTic.private_seg_size, 32+max(G_CheckDemoStatus.private_seg_size, D_AdvanceDemo.private_seg_size, G_Ticker.private_seg_size) ; CHECK: .set RunTic.uses_vcc, or(1, G_CheckDemoStatus.uses_vcc, D_AdvanceDemo.uses_vcc, G_Ticker.uses_vcc) ; CHECK: .set RunTic.uses_flat_scratch, or(0, G_CheckDemoStatus.uses_flat_scratch, D_AdvanceDemo.uses_flat_scratch, G_Ticker.uses_flat_scratch) ; CHECK: .set RunTic.has_dyn_sized_stack, or(0, G_CheckDemoStatus.has_dyn_sized_stack, D_AdvanceDemo.has_dyn_sized_stack, G_Ticker.has_dyn_sized_stack) diff --git a/llvm/test/CodeGen/AMDGPU/multi-call-resource-usage-mcexpr.ll b/llvm/test/CodeGen/AMDGPU/multi-call-resource-usage-mcexpr.ll index e150231e3d9e1..7a810d0067c17 100644 --- a/llvm/test/CodeGen/AMDGPU/multi-call-resource-usage-mcexpr.ll +++ b/llvm/test/CodeGen/AMDGPU/multi-call-resource-usage-mcexpr.ll @@ -20,7 +20,7 @@ entry: ; CHECK: .set baz.num_vgpr, max(49, qux.num_vgpr) ; CHECK: .set baz.num_agpr, max(0, qux.num_agpr) ; CHECK: .set baz.numbered_sgpr, max(34, qux.numbered_sgpr) -; CHECK: .set baz.private_seg_size, 16+(max(qux.private_seg_size)) +; CHECK: .set baz.private_seg_size, 16+max(qux.private_seg_size) ; CHECK: .set baz.uses_vcc, or(0, qux.uses_vcc) ; CHECK: .set baz.uses_flat_scratch, or(0, qux.uses_flat_scratch) ; CHECK: .set baz.has_dyn_sized_stack, or(0, qux.has_dyn_sized_stack) @@ -37,7 +37,7 @@ entry: ; CHECK: .set bar.num_vgpr, max(65, baz.num_vgpr, qux.num_vgpr) ; CHECK: .set bar.num_agpr, max(0, baz.num_agpr, qux.num_agpr) ; CHECK: .set bar.numbered_sgpr, max(34, baz.numbered_sgpr, qux.numbered_sgpr) -; CHECK: .set bar.private_seg_size, 16+(max(baz.private_seg_size, qux.private_seg_size)) +; CHECK: .set bar.private_seg_size, 16+max(baz.private_seg_size, qux.private_seg_size) ; CHECK: .set bar.uses_vcc, or(0, baz.uses_vcc, qux.uses_vcc) ; CHECK: .set bar.uses_flat_scratch, or(0, baz.uses_flat_scratch, qux.uses_flat_scratch) ; CHECK: .set bar.has_dyn_sized_stack, or(0, baz.has_dyn_sized_stack, qux.has_dyn_sized_stack) @@ -56,7 +56,7 @@ entry: ; CHECK: .set foo.num_vgpr, max(38, bar.num_vgpr) ; CHECK: .set foo.num_agpr, max(0, bar.num_agpr) ; CHECK: .set foo.numbered_sgpr, max(34, bar.numbered_sgpr) -; CHECK: .set foo.private_seg_size, 16+(max(bar.private_seg_size)) +; CHECK: .set foo.private_seg_size, 16+max(bar.private_seg_size) ; CHECK: .set foo.uses_vcc, or(0, bar.uses_vcc) ; CHECK: .set foo.uses_flat_scratch, or(0, bar.uses_flat_scratch) ; CHECK: .set foo.has_dyn_sized_stack, or(0, bar.has_dyn_sized_stack) @@ -73,7 +73,7 @@ entry: ; CHECK: .set usefoo.num_vgpr, max(32, foo.num_vgpr) ; CHECK: .set usefoo.num_agpr, max(0, foo.num_agpr) ; CHECK: .set usefoo.numbered_sgpr, max(33, foo.numbered_sgpr) -; CHECK: .set usefoo.private_seg_size, 0+(max(foo.private_seg_size)) +; CHECK: .set usefoo.private_seg_size, 0+max(foo.private_seg_size) ; CHECK: .set usefoo.uses_vcc, or(0, foo.uses_vcc) ; CHECK: .set usefoo.uses_flat_scratch, or(1, foo.uses_flat_scratch) ; CHECK: .set usefoo.has_dyn_sized_stack, or(0, foo.has_dyn_sized_stack) diff --git a/llvm/test/CodeGen/AMDGPU/recursion.ll b/llvm/test/CodeGen/AMDGPU/recursion.ll index 0ff0ca1e54f6f..ff92db746b062 100644 --- a/llvm/test/CodeGen/AMDGPU/recursion.ll +++ b/llvm/test/CodeGen/AMDGPU/recursion.ll @@ -3,7 +3,7 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=V5 %s ; CHECK-LABEL: {{^}}recursive: -; CHECK: .set recursive.private_seg_size, 16+(max(16384)) +; CHECK: .set recursive.private_seg_size, 16+max(16384) ; CHECK: ScratchSize: 16 ; V5-LABEL: {{^}}recursive: @@ -22,7 +22,7 @@ define void @tail_recursive() { ret void } -; CHECK: .set calls_tail_recursive.private_seg_size, 0+(max(tail_recursive.private_seg_size)) +; CHECK: .set calls_tail_recursive.private_seg_size, 0+max(tail_recursive.private_seg_size) define void @calls_tail_recursive() norecurse { tail call void @tail_recursive() ret void @@ -41,10 +41,10 @@ define void @tail_recursive_with_stack() { ; For an arbitrary recursive call, report a large number for unknown stack ; usage for code object v4 and older ; CHECK-LABEL: {{^}}calls_recursive: -; CHECK: .set calls_recursive.private_seg_size, 0+(max(16384, recursive.private_seg_size)) +; CHECK: .set calls_recursive.private_seg_size, 0+max(16384, recursive.private_seg_size) ; ; V5-LABEL: {{^}}calls_recursive: -; V5: .set calls_recursive.private_seg_size, 0+(max(recursive.private_seg_size)) +; V5: .set calls_recursive.private_seg_size, 0+max(recursive.private_seg_size) ; V5: .set calls_recursive.has_dyn_sized_stack, or(0, recursive.has_dyn_sized_stack) define amdgpu_kernel void @calls_recursive() { call void @recursive() @@ -54,7 +54,7 @@ define amdgpu_kernel void @calls_recursive() { ; Make sure we do not report a huge stack size for tail recursive ; functions ; CHECK-LABEL: {{^}}kernel_indirectly_calls_tail_recursive: -; CHECK: .set kernel_indirectly_calls_tail_recursive.private_seg_size, 0+(max(calls_tail_recursive.private_seg_size)) +; CHECK: .set kernel_indirectly_calls_tail_recursive.private_seg_size, 0+max(calls_tail_recursive.private_seg_size) define amdgpu_kernel void @kernel_indirectly_calls_tail_recursive() { call void @calls_tail_recursive() ret void @@ -65,10 +65,10 @@ define amdgpu_kernel void @kernel_indirectly_calls_tail_recursive() { ; in the kernel. ; CHECK-LABEL: {{^}}kernel_calls_tail_recursive: -; CHECK: .set kernel_calls_tail_recursive.private_seg_size, 0+(max(16384, tail_recursive.private_seg_size)) +; CHECK: .set kernel_calls_tail_recursive.private_seg_size, 0+max(16384, tail_recursive.private_seg_size) ; ; V5-LABEL: {{^}}kernel_calls_tail_recursive: -; V5: .set kernel_calls_tail_recursive.private_seg_size, 0+(max(tail_recursive.private_seg_size)) +; V5: .set kernel_calls_tail_recursive.private_seg_size, 0+max(tail_recursive.private_seg_size) ; V5: .set kernel_calls_tail_recursive.has_recursion, or(1, tail_recursive.has_recursion) define amdgpu_kernel void @kernel_calls_tail_recursive() { call void @tail_recursive() @@ -76,10 +76,10 @@ define amdgpu_kernel void @kernel_calls_tail_recursive() { } ; CHECK-LABEL: {{^}}kernel_calls_tail_recursive_with_stack: -; CHECK: .set kernel_calls_tail_recursive_with_stack.private_seg_size, 0+(max(16384, tail_recursive_with_stack.private_seg_size)) +; CHECK: .set kernel_calls_tail_recursive_with_stack.private_seg_size, 0+max(16384, tail_recursive_with_stack.private_seg_size) ; ; V5-LABEL: {{^}}kernel_calls_tail_recursive_with_stack: -; V5: .set kernel_calls_tail_recursive_with_stack.private_seg_size, 0+(max(tail_recursive_with_stack.private_seg_size)) +; V5: .set kernel_calls_tail_recursive_with_stack.private_seg_size, 0+max(tail_recursive_with_stack.private_seg_size) ; V5: .set kernel_calls_tail_recursive_with_stack.has_dyn_sized_stack, or(0, tail_recursive_with_stack.has_dyn_sized_stack) define amdgpu_kernel void @kernel_calls_tail_recursive_with_stack() { call void @tail_recursive_with_stack() diff --git a/llvm/test/CodeGen/AMDGPU/recursive-resource-usage-mcexpr.ll b/llvm/test/CodeGen/AMDGPU/recursive-resource-usage-mcexpr.ll index ac6bd9a4ae8a6..3093349bff37c 100644 --- a/llvm/test/CodeGen/AMDGPU/recursive-resource-usage-mcexpr.ll +++ b/llvm/test/CodeGen/AMDGPU/recursive-resource-usage-mcexpr.ll @@ -15,7 +15,7 @@ ; CHECK: .set baz.num_vgpr, max(61, qux.num_vgpr) ; CHECK: .set baz.num_agpr, max(0, qux.num_agpr) ; CHECK: .set baz.numbered_sgpr, max(51, qux.numbered_sgpr) -; CHECK: .set baz.private_seg_size, 16+(max(qux.private_seg_size)) +; CHECK: .set baz.private_seg_size, 16+max(qux.private_seg_size) ; CHECK: .set baz.uses_vcc, or(1, qux.uses_vcc) ; CHECK: .set baz.uses_flat_scratch, or(0, qux.uses_flat_scratch) ; CHECK: .set baz.has_dyn_sized_stack, or(0, qux.has_dyn_sized_stack) @@ -26,7 +26,7 @@ ; CHECK: .set bar.num_vgpr, max(51, baz.num_vgpr) ; CHECK: .set bar.num_agpr, max(0, baz.num_agpr) ; CHECK: .set bar.numbered_sgpr, max(61, baz.numbered_sgpr) -; CHECK: .set bar.private_seg_size, 16+(max(baz.private_seg_size)) +; CHECK: .set bar.private_seg_size, 16+max(baz.private_seg_size) ; CHECK: .set bar.uses_vcc, or(1, baz.uses_vcc) ; CHECK: .set bar.uses_flat_scratch, or(0, baz.uses_flat_scratch) ; CHECK: .set bar.has_dyn_sized_stack, or(0, baz.has_dyn_sized_stack) @@ -80,7 +80,7 @@ entry: ; CHECK: .set usefoo.num_vgpr, max(32, foo.num_vgpr) ; CHECK: .set usefoo.num_agpr, max(0, foo.num_agpr) ; CHECK: .set usefoo.numbered_sgpr, max(33, foo.numbered_sgpr) -; CHECK: .set usefoo.private_seg_size, 0+(max(foo.private_seg_size)) +; CHECK: .set usefoo.private_seg_size, 0+max(foo.private_seg_size) ; CHECK: .set usefoo.uses_vcc, or(1, foo.uses_vcc) ; CHECK: .set usefoo.uses_flat_scratch, or(1, foo.uses_flat_scratch) ; CHECK: .set usefoo.has_dyn_sized_stack, or(0, foo.has_dyn_sized_stack) diff --git a/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll b/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll index ef91be9366b02..afb77ed190896 100644 --- a/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll +++ b/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll @@ -146,7 +146,7 @@ define void @empty_func() !dbg !8 { ; STDERR-NEXT: remark: foo.cl:64:0: AGPRs: test_indirect_call.num_agpr ; STDERR-NEXT: remark: foo.cl:64:0: ScratchSize [bytes/lane]: 0 ; STDERR-NEXT: remark: foo.cl:64:0: Dynamic Stack: True -; STDERR-NEXT: remark: foo.cl:64:0: Occupancy [waves/SIMD]: occupancy(10, 4, 256, 8, 10, max(test_indirect_call.numbered_sgpr+(extrasgprs(test_indirect_call.uses_vcc, test_indirect_call.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(test_indirect_call.num_agpr, test_indirect_call.num_vgpr), 1, 0)) +; STDERR-NEXT: remark: foo.cl:64:0: Occupancy [waves/SIMD]: occupancy(10, 4, 256, 8, 10, max(test_indirect_call.numbered_sgpr+extrasgprs(test_indirect_call.uses_vcc, test_indirect_call.uses_flat_scratch, 1), 1, 0), max(totalnumvgprs(test_indirect_call.num_agpr, test_indirect_call.num_vgpr), 1, 0)) ; STDERR-NEXT: remark: foo.cl:64:0: SGPRs Spill: 0 ; STDERR-NEXT: remark: foo.cl:64:0: VGPRs Spill: 0 ; STDERR-NEXT: remark: foo.cl:64:0: LDS Size [bytes/block]: 0 @@ -164,7 +164,7 @@ define amdgpu_kernel void @test_indirect_call() !dbg !9 { ; STDERR-NEXT: remark: foo.cl:74:0: AGPRs: test_indirect_w_static_stack.num_agpr ; STDERR-NEXT: remark: foo.cl:74:0: ScratchSize [bytes/lane]: 144 ; STDERR-NEXT: remark: foo.cl:74:0: Dynamic Stack: True -; STDERR-NEXT: remark: foo.cl:74:0: Occupancy [waves/SIMD]: occupancy(10, 4, 256, 8, 10, max(test_indirect_w_static_stack.numbered_sgpr+(extrasgprs(test_indirect_w_static_stack.uses_vcc, test_indirect_w_static_stack.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(test_indirect_w_static_stack.num_agpr, test_indirect_w_static_stack.num_vgpr), 1, 0)) +; STDERR-NEXT: remark: foo.cl:74:0: Occupancy [waves/SIMD]: occupancy(10, 4, 256, 8, 10, max(test_indirect_w_static_stack.numbered_sgpr+extrasgprs(test_indirect_w_static_stack.uses_vcc, test_indirect_w_static_stack.uses_flat_scratch, 1), 1, 0), max(totalnumvgprs(test_indirect_w_static_stack.num_agpr, test_indirect_w_static_stack.num_vgpr), 1, 0)) ; STDERR-NEXT: remark: foo.cl:74:0: SGPRs Spill: 0 ; STDERR-NEXT: remark: foo.cl:74:0: VGPRs Spill: 0 ; STDERR-NEXT: remark: foo.cl:74:0: LDS Size [bytes/block]: 0 diff --git a/llvm/test/CodeGen/AMDGPU/unnamed-function-resource-info.ll b/llvm/test/CodeGen/AMDGPU/unnamed-function-resource-info.ll index c9fbd369e062d..cf5b95a729974 100644 --- a/llvm/test/CodeGen/AMDGPU/unnamed-function-resource-info.ll +++ b/llvm/test/CodeGen/AMDGPU/unnamed-function-resource-info.ll @@ -19,7 +19,7 @@ entry: ; CHECK: .set __unnamed_2.num_vgpr, max(32, __unnamed_1.num_vgpr) ; CHECK: .set __unnamed_2.num_agpr, max(0, __unnamed_1.num_agpr) ; CHECK: .set __unnamed_2.numbered_sgpr, max(34, __unnamed_1.numbered_sgpr) -; CHECK: .set __unnamed_2.private_seg_size, 16+(max(__unnamed_1.private_seg_size)) +; CHECK: .set __unnamed_2.private_seg_size, 16+max(__unnamed_1.private_seg_size) ; CHECK: .set __unnamed_2.uses_vcc, or(0, __unnamed_1.uses_vcc) ; CHECK: .set __unnamed_2.uses_flat_scratch, or(0, __unnamed_1.uses_flat_scratch) ; CHECK: .set __unnamed_2.has_dyn_sized_stack, or(0, __unnamed_1.has_dyn_sized_stack) @@ -35,7 +35,7 @@ entry: ; CHECK: .set use.num_vgpr, max(32, __unnamed_1.num_vgpr, __unnamed_2.num_vgpr) ; CHECK: .set use.num_agpr, max(0, __unnamed_1.num_agpr, __unnamed_2.num_agpr) ; CHECK: .set use.numbered_sgpr, max(33, __unnamed_1.numbered_sgpr, __unnamed_2.numbered_sgpr) -; CHECK: .set use.private_seg_size, 0+(max(__unnamed_1.private_seg_size, __unnamed_2.private_seg_size)) +; CHECK: .set use.private_seg_size, 0+max(__unnamed_1.private_seg_size, __unnamed_2.private_seg_size) ; CHECK: .set use.uses_vcc, or(0, __unnamed_1.uses_vcc, __unnamed_2.uses_vcc) ; CHECK: .set use.uses_flat_scratch, or(1, __unnamed_1.uses_flat_scratch, __unnamed_2.uses_flat_scratch) ; CHECK: .set use.has_dyn_sized_stack, or(0, __unnamed_1.has_dyn_sized_stack, __unnamed_2.has_dyn_sized_stack) diff --git a/llvm/test/CodeGen/AMDGPU/vopc_dpp-true16.mir b/llvm/test/CodeGen/AMDGPU/vopc_dpp-true16.mir new file mode 100644 index 0000000000000..8f63f6c8cb1c6 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/vopc_dpp-true16.mir @@ -0,0 +1,124 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN + +--- + +name: vopc +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GCN-LABEL: name: vopc + ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: V_CMP_LT_F32_e32_dpp 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec + ; GCN-NEXT: V_CMPX_GT_U32_nosdst_e64 [[V_MOV_B32_dpp]], [[COPY]], implicit-def $exec, implicit $mode, implicit $exec + ; GCN-NEXT: V_CMP_CLASS_F32_e32_dpp 2, [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec + ; GCN-NEXT: V_CMP_NGE_F32_e32_dpp 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CMP_NGE_F32_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F32_e64_dpp 0, [[COPY1]], 0, [[COPY]], 0, 1, 15, 15, 1, implicit $mode, implicit $exec + ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sgpr_32 = S_AND_B32 [[V_CMP_NGE_F32_e64_dpp]], 10101, implicit-def $scc + ; GCN-NEXT: V_CMP_GT_I32_e32_dpp [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $exec + %0:vgpr_32 = COPY $vgpr0 + %1:vgpr_32 = COPY $vgpr1 + %2:vgpr_32 = COPY $vgpr2 + %3:vgpr_32 = IMPLICIT_DEF + + %4:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec + V_CMP_LT_F32_e32 %4, %0, implicit-def $vcc, implicit $mode, implicit $exec + + %10:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec + V_CMPX_GT_U32_nosdst_e64 %10, %0, implicit-def $exec, implicit $mode, implicit $exec + + %11:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec + %12:sgpr_32 = V_CMP_CLASS_F32_e64 2, %11, %0, implicit $mode, implicit $exec + + %13:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec + %14:sgpr_32 = V_CMP_NGE_F32_e64 0, %13, 0, %0, 0, implicit $mode, implicit $exec + + %17:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec + %18:sgpr_32 = V_CMP_NGE_F32_e64 0, %17, 0, %0, 0, implicit $mode, implicit $exec + %19:sgpr_32 = S_AND_B32 %18, 10101, implicit-def $scc + + %20:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec + V_CMP_LT_I32_e32 %0, %20, implicit-def $vcc, implicit $exec + +... +--- + +# V_MOV_B16_t16_e64_dpp is unsupported to combine +name: vopc_16 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_lo16, $vgpr1_hi16, $vgpr255_hi16 + + ; GCN-LABEL: name: vopc_16 + ; GCN: liveins: $vgpr0_lo16, $vgpr1_hi16, $vgpr255_hi16 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_16 = COPY $vgpr0_lo16 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY $vgpr1_hi16 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_16 = COPY $vgpr255_hi16 + ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF + ; GCN-NEXT: [[V_MOV_B16_t16_e64_dpp:%[0-9]+]]:vgpr_16 = V_MOV_B16_t16_e64_dpp [[DEF]], 0, [[COPY1]], 0, 1, 15, 15, 1, implicit $exec + ; GCN-NEXT: V_CMPX_EQ_I16_t16_nosdst_e64 0, [[V_MOV_B16_t16_e64_dpp]], 0, [[COPY]], 0, implicit-def $exec, implicit-def $vcc_lo, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MOV_B16_t16_e64_dpp1:%[0-9]+]]:vgpr_16 = V_MOV_B16_t16_e64_dpp [[DEF]], 0, [[COPY1]], 0, 1, 15, 15, 1, implicit $exec + ; GCN-NEXT: [[V_CMP_CLASS_F16_t16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, [[V_MOV_B16_t16_e64_dpp1]], 0, [[COPY]], 0, implicit-def $vcc_lo, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MOV_B16_t16_e64_dpp2:%[0-9]+]]:vgpr_16 = V_MOV_B16_t16_e64_dpp [[DEF]], 0, [[COPY1]], 0, 1, 15, 15, 1, implicit $exec + ; GCN-NEXT: [[V_CMP_GE_F16_t16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_GE_F16_t16_e64 1, [[V_MOV_B16_t16_e64_dpp2]], 0, [[COPY]], 1, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MOV_B16_t16_e64_dpp3:%[0-9]+]]:vgpr_16 = V_MOV_B16_t16_e64_dpp [[DEF]], 0, [[COPY1]], 0, 1, 15, 15, 1, implicit $exec + ; GCN-NEXT: [[V_CMP_NGE_F16_t16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_NGE_F16_t16_e64 0, [[V_CMP_NGE_F16_t16_e64_]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + %0:vgpr_16 = COPY $vgpr0_lo16 + %1:vgpr_16 = COPY $vgpr1_hi16 + %2:vgpr_16 = COPY $vgpr255_hi16 + %3:vgpr_16 = IMPLICIT_DEF + + %5:vgpr_16 = V_MOV_B16_t16_e64_dpp %3, 0, %1, 0, 1, 15, 15, 1, implicit $exec + V_CMPX_EQ_I16_t16_nosdst_e64 0, %5, 0, %0, 0, implicit-def $exec, implicit-def $vcc, implicit $mode, implicit $exec + + %6:vgpr_16 = V_MOV_B16_t16_e64_dpp %3, 0, %1, 0, 1, 15, 15, 1, implicit $exec + %7:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, %6, 0, %0, 0, implicit-def $vcc, implicit $mode, implicit $exec + + %8:vgpr_16 = V_MOV_B16_t16_e64_dpp %3, 0, %1, 0, 1, 15, 15, 1, implicit $exec + %9:sgpr_32 = V_CMP_GE_F16_t16_e64 1, %8, 0, %0, 1, 0, implicit $mode, implicit $exec + + %15:vgpr_16 = V_MOV_B16_t16_e64_dpp %3, 0, %1, 0, 1, 15, 15, 1, implicit $exec + %16:sgpr_32 = V_CMP_NGE_F16_t16_e64 0, %16, 0, %0, 0, 0, implicit $mode, implicit $exec + +... +--- + +name: mask_not_full +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GCN-LABEL: name: mask_not_full + ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GCN-NEXT: [[V_MOV_B16_t16_e64_dpp:%[0-9]+]]:vgpr_16 = V_MOV_B16_t16_e64_dpp [[DEF]].lo16, 0, [[COPY1]].hi16, 0, 1, 15, 14, 1, implicit $exec + ; GCN-NEXT: [[V_CMP_CLASS_F16_t16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, [[V_MOV_B16_t16_e64_dpp]], 0, [[COPY]].lo16, 0, implicit-def $vcc_lo, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[COPY1]], 1, 13, 15, 1, implicit $exec + ; GCN-NEXT: [[V_CMP_GE_F32_e64_:%[0-9]+]]:sgpr_32 = V_CMP_GE_F32_e64 1, [[V_MOV_B32_dpp]], 0, [[COPY]], 1, implicit $mode, implicit $exec + %0:vgpr_32 = COPY $vgpr0 + %1:vgpr_32 = COPY $vgpr1 + %2:vgpr_32 = IMPLICIT_DEF + %3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + + %4:vgpr_16 = V_MOV_B16_t16_e64_dpp %2.lo16, 0, %1.hi16, 0, 1, 15, 14, 1, implicit $exec + %99:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, %4, 0, %0.lo16, 0, implicit-def $vcc, implicit $mode, implicit $exec + + %5:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 13, 15, 1, implicit $exec + %6:sgpr_32 = V_CMP_GE_F32_e64 1, %5, 0, %0, 1, implicit $mode, implicit $exec + +... diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-isel-globals-pic.ll b/llvm/test/CodeGen/ARM/GlobalISel/arm-isel-globals-pic.ll index 80d687ba0f53d..c8adcaac19b7f 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/arm-isel-globals-pic.ll +++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-isel-globals-pic.ll @@ -41,7 +41,7 @@ define i32 @test_external_global() { ; CHECK-NEXT: bx lr ; ELF: [[LABEL]]: ; ELF: [[TMPLABEL:.L[[:alnum:]_]+]]: -; ELF: .long external_global(GOT_PREL)-(([[ANCHOR]]+8)-[[TMPLABEL]]) +; ELF: .long external_global(GOT_PREL)-([[ANCHOR]]+8-[[TMPLABEL]]) ; DARWIN-NOMOVT: [[LABEL]]: ; DARWIN-NOMOVT: .long L_external_global$non_lazy_ptr-([[ANCHOR]]+8) ; DARWIN-NOMOVT-NOT: .long L_external_global @@ -88,7 +88,7 @@ define i32 @test_external_constant() { ; CHECK-NEXT: bx lr ; ELF: [[LABEL]]: ; ELF: [[TMPLABEL:.L[[:alnum:]_]+]]: -; ELF: .long external_constant(GOT_PREL)-(([[ANCHOR]]+8)-[[TMPLABEL]]) +; ELF: .long external_constant(GOT_PREL)-([[ANCHOR]]+8-[[TMPLABEL]]) ; DARWIN-NOMOVT: [[LABEL]]: ; DARWIN-NOMOVT: .long L_external_constant$non_lazy_ptr-([[ANCHOR]]+8) ; DARWIN-NOMOVT-NOT: .long L_external_constant diff --git a/llvm/test/CodeGen/ARM/GlobalISel/thumb-isel-globals-pic.ll b/llvm/test/CodeGen/ARM/GlobalISel/thumb-isel-globals-pic.ll index e6828a52f2941..a4cc43f4dd814 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/thumb-isel-globals-pic.ll +++ b/llvm/test/CodeGen/ARM/GlobalISel/thumb-isel-globals-pic.ll @@ -41,7 +41,7 @@ define i32 @test_external_global() { ; CHECK-NEXT: bx lr ; ELF: [[LABEL]]: ; ELF: [[TMPLABEL:.L[[:alnum:]_]+]]: -; ELF: .long external_global(GOT_PREL)-(([[ANCHOR]]+4)-[[TMPLABEL]]) +; ELF: .long external_global(GOT_PREL)-([[ANCHOR]]+4-[[TMPLABEL]]) ; DARWIN-NOMOVT: [[LABEL]]: ; DARWIN-NOMOVT: .long L_external_global$non_lazy_ptr-([[ANCHOR]]+4) ; DARWIN-NOMOVT-NOT: .long L_external_global @@ -88,7 +88,7 @@ define i32 @test_external_constant() { ; CHECK-NEXT: bx lr ; ELF: [[LABEL]]: ; ELF: [[TMPLABEL:.L[[:alnum:]_]+]]: -; ELF: .long external_constant(GOT_PREL)-(([[ANCHOR]]+4)-[[TMPLABEL]]) +; ELF: .long external_constant(GOT_PREL)-([[ANCHOR]]+4-[[TMPLABEL]]) ; DARWIN-NOMOVT: [[LABEL]]: ; DARWIN-NOMOVT: .long L_external_constant$non_lazy_ptr-([[ANCHOR]]+4) ; DARWIN-NOMOVT-NOT: .long L_external_constant diff --git a/llvm/test/CodeGen/ARM/elf-preemption.ll b/llvm/test/CodeGen/ARM/elf-preemption.ll index efb1c6aa26817..154c29c1c029c 100644 --- a/llvm/test/CodeGen/ARM/elf-preemption.ll +++ b/llvm/test/CodeGen/ARM/elf-preemption.ll @@ -22,7 +22,7 @@ define ptr @get_preemptable_var() nounwind { ; PIC-NEXT: @ %bb.1: ; PIC-NEXT: .LCPI0_0: ; PIC-NEXT: .Ltmp0: -; PIC-NEXT: .long preemptable_var(GOT_PREL)-((.LPC0_0+8)-.Ltmp0) +; PIC-NEXT: .long preemptable_var(GOT_PREL)-(.LPC0_0+8-.Ltmp0) ret ptr @preemptable_var } @@ -127,7 +127,7 @@ define dso_preemptable ptr @preemptable_func() nounwind { ; PIC-NEXT: @ %bb.1: ; PIC-NEXT: .LCPI5_0: ; PIC-NEXT: .Ltmp1: -; PIC-NEXT: .long preemptable_func(GOT_PREL)-((.LPC5_0+8)-.Ltmp1) +; PIC-NEXT: .long preemptable_func(GOT_PREL)-(.LPC5_0+8-.Ltmp1) ret ptr @preemptable_func } diff --git a/llvm/test/CodeGen/ARM/globals.ll b/llvm/test/CodeGen/ARM/globals.ll index 3a36d16d53501..acd4655720b00 100644 --- a/llvm/test/CodeGen/ARM/globals.ll +++ b/llvm/test/CodeGen/ARM/globals.ll @@ -69,4 +69,4 @@ define i32 @test1() { ; LinuxPIC: .p2align 2 ; LinuxPIC: .LCPI0_0: ; LinuxPIC: .Ltmp0: -; LinuxPIC: .long G(GOT_PREL)-((.LPC0_0+8)-.Ltmp0) +; LinuxPIC: .long G(GOT_PREL)-(.LPC0_0+8-.Ltmp0) diff --git a/llvm/test/CodeGen/ARM/litpool-licm.ll b/llvm/test/CodeGen/ARM/litpool-licm.ll index f1a029b83f831..bcc15e93f4947 100644 --- a/llvm/test/CodeGen/ARM/litpool-licm.ll +++ b/llvm/test/CodeGen/ARM/litpool-licm.ll @@ -15,11 +15,11 @@ define void @func(i32 %n) { ; CHECK: [[CP1]]: ; CHECK-NEXT: [[CP1_TMP:.Ltmp[0-9]+]]: -; CHECK-NEXT: .long var(TLSGD)-(([[PCPOS1]]+4)-[[CP1_TMP]]) +; CHECK-NEXT: .long var(TLSGD)-([[PCPOS1]]+4-[[CP1_TMP]]) ; CHECK: [[CP2]]: ; CHECK-NEXT: [[CP2_TMP:.Ltmp[0-9]+]]: -; CHECK-NEXT: .long var(TLSGD)-(([[PCPOS2]]+4)-[[CP2_TMP]]) +; CHECK-NEXT: .long var(TLSGD)-([[PCPOS2]]+4-[[CP2_TMP]]) entry: br label %loop diff --git a/llvm/test/CodeGen/ARM/load-global.ll b/llvm/test/CodeGen/ARM/load-global.ll index 0d370a495d2f5..01f5b5793949e 100644 --- a/llvm/test/CodeGen/ARM/load-global.ll +++ b/llvm/test/CodeGen/ARM/load-global.ll @@ -43,7 +43,7 @@ define i32 @test1() { ; LINUX: ldr r0, .LCPI0_0 ; LINUX: ldr r0, [pc, r0] ; LINUX: ldr r0, [r0] -; LINUX: .long G(GOT_PREL)-((.LPC0_0+8)-.Ltmp0) +; LINUX: .long G(GOT_PREL)-(.LPC0_0+8-.Ltmp0) ; LINUX_T: ldr r0, .LCPI0_0 ; LINUX_T: add r0, pc diff --git a/llvm/test/CodeGen/ARM/load-global2.ll b/llvm/test/CodeGen/ARM/load-global2.ll index 08a8f4280d3b8..0ea2476388c42 100644 --- a/llvm/test/CodeGen/ARM/load-global2.ll +++ b/llvm/test/CodeGen/ARM/load-global2.ll @@ -28,7 +28,7 @@ define signext i8 @foo() { ; LINUX-PIC-NEXT: @ %bb.3: ; LINUX-PIC-NEXT: .LCPI0_0: ; LINUX-PIC-NEXT: .Ltmp0: -; LINUX-PIC-NEXT: .long x(GOT_PREL)-((.LPC0_0+8)-.Ltmp0) +; LINUX-PIC-NEXT: .long x(GOT_PREL)-(.LPC0_0+8-.Ltmp0) entry: %0 = load i8, ptr @x %tobool = icmp eq i8 %0, 0 diff --git a/llvm/test/CodeGen/ARM/plt-relative-reloc.ll b/llvm/test/CodeGen/ARM/plt-relative-reloc.ll index 414a48e5aaaed..ede891900e6d0 100644 --- a/llvm/test/CodeGen/ARM/plt-relative-reloc.ll +++ b/llvm/test/CodeGen/ARM/plt-relative-reloc.ll @@ -11,6 +11,6 @@ declare void @fn2() unnamed_addr declare void @fn3() ; CHECK: .long 0 -; CHECK-NEXT: .long (fn1(prel31)-vtable)-4 -; CHECK-NEXT: .long (fn2(prel31)-vtable)-4 -; CHECK-NEXT: .long (fn3-vtable)-4 +; CHECK-NEXT: .long fn1(prel31)-vtable-4 +; CHECK-NEXT: .long fn2(prel31)-vtable-4 +; CHECK-NEXT: .long fn3-vtable-4 diff --git a/llvm/test/CodeGen/ARM/stack-guard-elf.ll b/llvm/test/CodeGen/ARM/stack-guard-elf.ll index d0e5db7e5711b..eb40b33a5eeb2 100644 --- a/llvm/test/CodeGen/ARM/stack-guard-elf.ll +++ b/llvm/test/CodeGen/ARM/stack-guard-elf.ll @@ -43,10 +43,10 @@ define i32 @test1() #0 { ; CHECK-NEXT: @ %bb.2: ; CHECK-NEXT: .LCPI0_0: ; CHECK-NEXT: .Ltmp0: -; CHECK-NEXT: .long __stack_chk_guard(GOT_PREL)-((.LPC0_0+8)-.Ltmp0) +; CHECK-NEXT: .long __stack_chk_guard(GOT_PREL)-(.LPC0_0+8-.Ltmp0) ; CHECK-NEXT: .LCPI0_1: ; CHECK-NEXT: .Ltmp1: -; CHECK-NEXT: .long __stack_chk_guard(GOT_PREL)-((.LPC0_1+8)-.Ltmp1) +; CHECK-NEXT: .long __stack_chk_guard(GOT_PREL)-(.LPC0_1+8-.Ltmp1) ; ; THUMB1-LABEL: test1: ; THUMB1: @ %bb.0: @@ -88,10 +88,10 @@ define i32 @test1() #0 { ; THUMB1-NEXT: @ %bb.3: ; THUMB1-NEXT: .LCPI0_0: ; THUMB1-NEXT: .Ltmp0: -; THUMB1-NEXT: .long __stack_chk_guard(GOT_PREL)-((.LPC0_0+4)-.Ltmp0) +; THUMB1-NEXT: .long __stack_chk_guard(GOT_PREL)-(.LPC0_0+4-.Ltmp0) ; THUMB1-NEXT: .LCPI0_1: ; THUMB1-NEXT: .Ltmp1: -; THUMB1-NEXT: .long __stack_chk_guard(GOT_PREL)-((.LPC0_1+4)-.Ltmp1) +; THUMB1-NEXT: .long __stack_chk_guard(GOT_PREL)-(.LPC0_1+4-.Ltmp1) ; ; THUMB1-PIC-LABEL: test1: ; THUMB1-PIC: @ %bb.0: @@ -133,10 +133,10 @@ define i32 @test1() #0 { ; THUMB1-PIC-NEXT: @ %bb.3: ; THUMB1-PIC-NEXT: .LCPI0_0: ; THUMB1-PIC-NEXT: .Ltmp0: -; THUMB1-PIC-NEXT: .long __stack_chk_guard(GOT_PREL)-((.LPC0_0+4)-.Ltmp0) +; THUMB1-PIC-NEXT: .long __stack_chk_guard(GOT_PREL)-(.LPC0_0+4-.Ltmp0) ; THUMB1-PIC-NEXT: .LCPI0_1: ; THUMB1-PIC-NEXT: .Ltmp1: -; THUMB1-PIC-NEXT: .long __stack_chk_guard(GOT_PREL)-((.LPC0_1+4)-.Ltmp1) +; THUMB1-PIC-NEXT: .long __stack_chk_guard(GOT_PREL)-(.LPC0_1+4-.Ltmp1) ; ; THUMB2-LABEL: test1: ; THUMB2: @ %bb.0: @@ -169,10 +169,10 @@ define i32 @test1() #0 { ; THUMB2-NEXT: @ %bb.2: ; THUMB2-NEXT: .LCPI0_0: ; THUMB2-NEXT: .Ltmp0: -; THUMB2-NEXT: .long __stack_chk_guard(GOT_PREL)-((.LPC0_0+4)-.Ltmp0) +; THUMB2-NEXT: .long __stack_chk_guard(GOT_PREL)-(.LPC0_0+4-.Ltmp0) ; THUMB2-NEXT: .LCPI0_1: ; THUMB2-NEXT: .Ltmp1: -; THUMB2-NEXT: .long __stack_chk_guard(GOT_PREL)-((.LPC0_1+4)-.Ltmp1) +; THUMB2-NEXT: .long __stack_chk_guard(GOT_PREL)-(.LPC0_1+4-.Ltmp1) ; ; THUMB2-PIC-LABEL: test1: ; THUMB2-PIC: @ %bb.0: @@ -205,10 +205,10 @@ define i32 @test1() #0 { ; THUMB2-PIC-NEXT: @ %bb.2: ; THUMB2-PIC-NEXT: .LCPI0_0: ; THUMB2-PIC-NEXT: .Ltmp0: -; THUMB2-PIC-NEXT: .long __stack_chk_guard(GOT_PREL)-((.LPC0_0+4)-.Ltmp0) +; THUMB2-PIC-NEXT: .long __stack_chk_guard(GOT_PREL)-(.LPC0_0+4-.Ltmp0) ; THUMB2-PIC-NEXT: .LCPI0_1: ; THUMB2-PIC-NEXT: .Ltmp1: -; THUMB2-PIC-NEXT: .long __stack_chk_guard(GOT_PREL)-((.LPC0_1+4)-.Ltmp1) +; THUMB2-PIC-NEXT: .long __stack_chk_guard(GOT_PREL)-(.LPC0_1+4-.Ltmp1) %a1 = alloca [256 x i32], align 4 call void @foo(ptr %a1) #3 ret i32 0 diff --git a/llvm/test/CodeGen/ARM/stack-guard-rwpi.ll b/llvm/test/CodeGen/ARM/stack-guard-rwpi.ll index f4ae9ca0b1d97..14a0c244497d4 100644 --- a/llvm/test/CodeGen/ARM/stack-guard-rwpi.ll +++ b/llvm/test/CodeGen/ARM/stack-guard-rwpi.ll @@ -16,7 +16,7 @@ ; PIC: ldr {{r[0-9]+}}, .LCPI0_0 ; PIC: .LCPI0_0: ; PIC-NEXT: .Ltmp0: -; PIC-NEXT: .long __stack_chk_guard(GOT_PREL)-((.LPC0_0+8)-.Ltmp0) +; PIC-NEXT: .long __stack_chk_guard(GOT_PREL)-(.LPC0_0+8-.Ltmp0) define dso_local i32 @foo(i32 %t) nounwind sspstrong { entry: diff --git a/llvm/test/CodeGen/Mips/ehframe-indirect.ll b/llvm/test/CodeGen/Mips/ehframe-indirect.ll index 1cd2b86a8e158..901095cc6e7f6 100644 --- a/llvm/test/CodeGen/Mips/ehframe-indirect.ll +++ b/llvm/test/CodeGen/Mips/ehframe-indirect.ll @@ -51,7 +51,7 @@ declare void @foo() ; O32: [[PC_LABEL:\$tmp[0-9]+]]: ; N32: [[PC_LABEL:\.Ltmp[0-9]+]]: ; N64: [[PC_LABEL:\.Ltmp[0-9]+]]: -; O32: .4byte ($_ZTISt9exception.DW.stub)-([[PC_LABEL]]) +; O32: .4byte $_ZTISt9exception.DW.stub-[[PC_LABEL]] ; N32: .4byte .L_ZTISt9exception.DW.stub-[[PC_LABEL]] ; N64: .4byte .L_ZTISt9exception.DW.stub-[[PC_LABEL]] ; O32: $_ZTISt9exception.DW.stub: diff --git a/llvm/test/CodeGen/Mips/indirect-jump-hazard/long-branch.ll b/llvm/test/CodeGen/Mips/indirect-jump-hazard/long-branch.ll index e8771feefad33..df15658b54f52 100644 --- a/llvm/test/CodeGen/Mips/indirect-jump-hazard/long-branch.ll +++ b/llvm/test/CodeGen/Mips/indirect-jump-hazard/long-branch.ll @@ -34,9 +34,9 @@ define void @test1(i32 signext %s) { ; O32-PIC-NEXT: # %bb.1: # %entry ; O32-PIC-NEXT: addiu $sp, $sp, -8 ; O32-PIC-NEXT: sw $ra, 0($sp) -; O32-PIC-NEXT: lui $1, %hi(($BB0_4)-($BB0_2)) +; O32-PIC-NEXT: lui $1, %hi($BB0_4-$BB0_2) ; O32-PIC-NEXT: bal $BB0_2 -; O32-PIC-NEXT: addiu $1, $1, %lo(($BB0_4)-($BB0_2)) +; O32-PIC-NEXT: addiu $1, $1, %lo($BB0_4-$BB0_2) ; O32-PIC-NEXT: $BB0_2: # %entry ; O32-PIC-NEXT: addu $1, $ra, $1 ; O32-PIC-NEXT: lw $ra, 0($sp) @@ -59,8 +59,8 @@ define void @test1(i32 signext %s) { ; O32-R6-PIC-NEXT: # %bb.1: # %entry ; O32-R6-PIC-NEXT: addiu $sp, $sp, -8 ; O32-R6-PIC-NEXT: sw $ra, 0($sp) -; O32-R6-PIC-NEXT: lui $1, %hi(($BB0_4)-($BB0_2)) -; O32-R6-PIC-NEXT: addiu $1, $1, %lo(($BB0_4)-($BB0_2)) +; O32-R6-PIC-NEXT: lui $1, %hi($BB0_4-$BB0_2) +; O32-R6-PIC-NEXT: addiu $1, $1, %lo($BB0_4-$BB0_2) ; O32-R6-PIC-NEXT: balc $BB0_2 ; O32-R6-PIC-NEXT: $BB0_2: # %entry ; O32-R6-PIC-NEXT: addu $1, $ra, $1 diff --git a/llvm/test/CodeGen/Mips/jtstat.ll b/llvm/test/CodeGen/Mips/jtstat.ll index 21d7aba6aaa61..233ff110f137c 100644 --- a/llvm/test/CodeGen/Mips/jtstat.ll +++ b/llvm/test/CodeGen/Mips/jtstat.ll @@ -59,13 +59,13 @@ sw.epilog: ; preds = %entry, %sw.bb7, %sw ; CHECK-STATIC16: li ${{[0-9]+}}, %hi($JTI{{[0-9]+}}_{{[0-9]+}}) ; CHECK-STATIC16: lw ${{[0-9]+}}, %lo($JTI{{[0-9]+}}_{{[0-9]+}})(${{[0-9]+}}) ; CHECK-STATIC16: $JTI{{[0-9]+}}_{{[0-9]+}}: -; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}}) -; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}}) -; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}}) -; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}}) -; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}}) -; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}}) -; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}}) -; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}}) -; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}}) -; CHECK-STATIC16: .4byte ($BB0_{{[0-9]+}}) +; CHECK-STATIC16: .4byte $BB0_{{[0-9]+}} +; CHECK-STATIC16: .4byte $BB0_{{[0-9]+}} +; CHECK-STATIC16: .4byte $BB0_{{[0-9]+}} +; CHECK-STATIC16: .4byte $BB0_{{[0-9]+}} +; CHECK-STATIC16: .4byte $BB0_{{[0-9]+}} +; CHECK-STATIC16: .4byte $BB0_{{[0-9]+}} +; CHECK-STATIC16: .4byte $BB0_{{[0-9]+}} +; CHECK-STATIC16: .4byte $BB0_{{[0-9]+}} +; CHECK-STATIC16: .4byte $BB0_{{[0-9]+}} +; CHECK-STATIC16: .4byte $BB0_{{[0-9]+}} diff --git a/llvm/test/CodeGen/Mips/load-store-left-right.ll b/llvm/test/CodeGen/Mips/load-store-left-right.ll index 0b7e51cbf7dc6..3f318654d83b8 100644 --- a/llvm/test/CodeGen/Mips/load-store-left-right.ll +++ b/llvm/test/CodeGen/Mips/load-store-left-right.ll @@ -944,7 +944,7 @@ define void @pass_array_byval() nounwind { ; MIPS32-EL-NEXT: lbu $1, 6($1) ; MIPS32-EL-NEXT: sll $1, $1, 16 ; MIPS32-EL-NEXT: lw $25, %call16(extern_func)($gp) -; MIPS32-EL-NEXT: .reloc ($tmp0), R_MIPS_JALR, extern_func +; MIPS32-EL-NEXT: .reloc $tmp0, R_MIPS_JALR, extern_func ; MIPS32-EL-NEXT: $tmp0: ; MIPS32-EL-NEXT: jalr $25 ; MIPS32-EL-NEXT: or $5, $2, $1 @@ -970,7 +970,7 @@ define void @pass_array_byval() nounwind { ; MIPS32-EB-NEXT: lbu $1, 6($1) ; MIPS32-EB-NEXT: sll $1, $1, 8 ; MIPS32-EB-NEXT: lw $25, %call16(extern_func)($gp) -; MIPS32-EB-NEXT: .reloc ($tmp0), R_MIPS_JALR, extern_func +; MIPS32-EB-NEXT: .reloc $tmp0, R_MIPS_JALR, extern_func ; MIPS32-EB-NEXT: $tmp0: ; MIPS32-EB-NEXT: jalr $25 ; MIPS32-EB-NEXT: or $5, $2, $1 @@ -991,7 +991,7 @@ define void @pass_array_byval() nounwind { ; MIPS32R6-EL-NEXT: sll $3, $3, 16 ; MIPS32R6-EL-NEXT: lw $4, 0($1) ; MIPS32R6-EL-NEXT: lw $25, %call16(extern_func)($gp) -; MIPS32R6-EL-NEXT: .reloc ($tmp0), R_MIPS_JALR, extern_func +; MIPS32R6-EL-NEXT: .reloc $tmp0, R_MIPS_JALR, extern_func ; MIPS32R6-EL-NEXT: $tmp0: ; MIPS32R6-EL-NEXT: jalr $25 ; MIPS32R6-EL-NEXT: or $5, $2, $3 @@ -1013,7 +1013,7 @@ define void @pass_array_byval() nounwind { ; MIPS32R6-EB-NEXT: sll $3, $3, 16 ; MIPS32R6-EB-NEXT: lw $4, 0($1) ; MIPS32R6-EB-NEXT: lw $25, %call16(extern_func)($gp) -; MIPS32R6-EB-NEXT: .reloc ($tmp0), R_MIPS_JALR, extern_func +; MIPS32R6-EB-NEXT: .reloc $tmp0, R_MIPS_JALR, extern_func ; MIPS32R6-EB-NEXT: $tmp0: ; MIPS32R6-EB-NEXT: jalr $25 ; MIPS32R6-EB-NEXT: or $5, $3, $2 diff --git a/llvm/test/CodeGen/Mips/longbranch.ll b/llvm/test/CodeGen/Mips/longbranch.ll index d348f03295811..66ee3859ae448 100644 --- a/llvm/test/CodeGen/Mips/longbranch.ll +++ b/llvm/test/CodeGen/Mips/longbranch.ll @@ -58,9 +58,9 @@ define void @test1(i32 signext %s) { ; O32-PIC-NEXT: # %bb.1: # %entry ; O32-PIC-NEXT: addiu $sp, $sp, -8 ; O32-PIC-NEXT: sw $ra, 0($sp) -; O32-PIC-NEXT: lui $1, %hi(($BB0_4)-($BB0_2)) +; O32-PIC-NEXT: lui $1, %hi($BB0_4-$BB0_2) ; O32-PIC-NEXT: bal $BB0_2 -; O32-PIC-NEXT: addiu $1, $1, %lo(($BB0_4)-($BB0_2)) +; O32-PIC-NEXT: addiu $1, $1, %lo($BB0_4-$BB0_2) ; O32-PIC-NEXT: $BB0_2: # %entry ; O32-PIC-NEXT: addu $1, $ra, $1 ; O32-PIC-NEXT: lw $ra, 0($sp) @@ -98,8 +98,8 @@ define void @test1(i32 signext %s) { ; O32-R6-PIC-NEXT: # %bb.1: # %entry ; O32-R6-PIC-NEXT: addiu $sp, $sp, -8 ; O32-R6-PIC-NEXT: sw $ra, 0($sp) -; O32-R6-PIC-NEXT: lui $1, %hi(($BB0_4)-($BB0_2)) -; O32-R6-PIC-NEXT: addiu $1, $1, %lo(($BB0_4)-($BB0_2)) +; O32-R6-PIC-NEXT: lui $1, %hi($BB0_4-$BB0_2) +; O32-R6-PIC-NEXT: addiu $1, $1, %lo($BB0_4-$BB0_2) ; O32-R6-PIC-NEXT: balc $BB0_2 ; O32-R6-PIC-NEXT: $BB0_2: # %entry ; O32-R6-PIC-NEXT: addu $1, $ra, $1 @@ -212,9 +212,9 @@ define void @test1(i32 signext %s) { ; MICROMIPS-NEXT: # %bb.1: # %entry ; MICROMIPS-NEXT: addiu $sp, $sp, -8 ; MICROMIPS-NEXT: sw $ra, 0($sp) -; MICROMIPS-NEXT: lui $1, %hi(($BB0_4)-($BB0_2)) +; MICROMIPS-NEXT: lui $1, %hi($BB0_4-$BB0_2) ; MICROMIPS-NEXT: bal $BB0_2 -; MICROMIPS-NEXT: addiu $1, $1, %lo(($BB0_4)-($BB0_2)) +; MICROMIPS-NEXT: addiu $1, $1, %lo($BB0_4-$BB0_2) ; MICROMIPS-NEXT: $BB0_2: # %entry ; MICROMIPS-NEXT: addu $1, $ra, $1 ; MICROMIPS-NEXT: lw $ra, 0($sp) @@ -261,8 +261,8 @@ define void @test1(i32 signext %s) { ; MICROMIPSR6PIC-NEXT: # %bb.1: # %entry ; MICROMIPSR6PIC-NEXT: addiu $sp, $sp, -8 ; MICROMIPSR6PIC-NEXT: sw $ra, 0($sp) -; MICROMIPSR6PIC-NEXT: lui $1, %hi(($BB0_4)-($BB0_2)) -; MICROMIPSR6PIC-NEXT: addiu $1, $1, %lo(($BB0_4)-($BB0_2)) +; MICROMIPSR6PIC-NEXT: lui $1, %hi($BB0_4-$BB0_2) +; MICROMIPSR6PIC-NEXT: addiu $1, $1, %lo($BB0_4-$BB0_2) ; MICROMIPSR6PIC-NEXT: balc $BB0_2 ; MICROMIPSR6PIC-NEXT: $BB0_2: # %entry ; MICROMIPSR6PIC-NEXT: addu $1, $ra, $1 @@ -285,9 +285,9 @@ define void @test1(i32 signext %s) { ; NACL-NEXT: # %bb.1: ; NACL-NEXT: addiu $sp, $sp, -8 ; NACL-NEXT: sw $ra, 0($sp) -; NACL-NEXT: lui $1, %hi(($BB0_4)-($BB0_2)) +; NACL-NEXT: lui $1, %hi($BB0_4-$BB0_2) ; NACL-NEXT: bal $BB0_2 -; NACL-NEXT: addiu $1, $1, %lo(($BB0_4)-($BB0_2)) +; NACL-NEXT: addiu $1, $1, %lo($BB0_4-$BB0_2) ; NACL-NEXT: $BB0_2: ; NACL-NEXT: addu $1, $ra, $1 ; NACL-NEXT: lw $ra, 0($sp) diff --git a/llvm/test/CodeGen/Mips/mcount.ll b/llvm/test/CodeGen/Mips/mcount.ll index 41100e6cbeb6f..713666ddee649 100644 --- a/llvm/test/CodeGen/Mips/mcount.ll +++ b/llvm/test/CodeGen/Mips/mcount.ll @@ -40,7 +40,7 @@ define void @foo() { ; MIPS32-PIC-NEXT: addu $gp, $2, $25 ; MIPS32-PIC-NEXT: lw $25, %call16(_mcount)($gp) ; MIPS32-PIC-NEXT: move $1, $ra -; MIPS32-PIC-NEXT: .reloc ($tmp0), R_MIPS_JALR, _mcount +; MIPS32-PIC-NEXT: .reloc $tmp0, R_MIPS_JALR, _mcount ; MIPS32-PIC-NEXT: $tmp0: ; MIPS32-PIC-NEXT: jalr $25 ; MIPS32-PIC-NEXT: addiu $sp, $sp, -8 @@ -107,7 +107,7 @@ define void @foo() { ; MIPS32-MM-PIC-NEXT: addu $gp, $2, $25 ; MIPS32-MM-PIC-NEXT: lw $25, %call16(_mcount)($gp) ; MIPS32-MM-PIC-NEXT: move $1, $ra -; MIPS32-MM-PIC-NEXT: .reloc ($tmp0), R_MICROMIPS_JALR, _mcount +; MIPS32-MM-PIC-NEXT: .reloc $tmp0, R_MICROMIPS_JALR, _mcount ; MIPS32-MM-PIC-NEXT: $tmp0: ; MIPS32-MM-PIC-NEXT: jalr $25 ; MIPS32-MM-PIC-NEXT: addiu $sp, $sp, -8 diff --git a/llvm/test/CodeGen/Mips/micromips-mtc-mfc.ll b/llvm/test/CodeGen/Mips/micromips-mtc-mfc.ll index e23f0760d8d91..66b484b47550f 100644 --- a/llvm/test/CodeGen/Mips/micromips-mtc-mfc.ll +++ b/llvm/test/CodeGen/Mips/micromips-mtc-mfc.ll @@ -12,11 +12,11 @@ define double @foo(double %a, double %b) { ; MM2-NEXT: mthc1 $zero, $f2 # encoding: [0x54,0x02,0x38,0x3b] ; MM2-NEXT: c.ule.d $f12, $f2 # encoding: [0x54,0x4c,0x05,0xfc] ; MM2-NEXT: bc1t $BB0_2 # encoding: [0x43,0xa0,A,A] -; MM2-NEXT: # fixup A - offset: 0, value: ($BB0_2), kind: fixup_MICROMIPS_PC16_S1 +; MM2-NEXT: # fixup A - offset: 0, value: $BB0_2, kind: fixup_MICROMIPS_PC16_S1 ; MM2-NEXT: nop # encoding: [0x00,0x00,0x00,0x00] ; MM2-NEXT: # %bb.1: # %entry ; MM2-NEXT: j $BB0_2 # encoding: [0b110101AA,A,A,A] -; MM2-NEXT: # fixup A - offset: 0, value: ($BB0_2), kind: fixup_MICROMIPS_26_S1 +; MM2-NEXT: # fixup A - offset: 0, value: $BB0_2, kind: fixup_MICROMIPS_26_S1 ; MM2-NEXT: nop # encoding: [0x00,0x00,0x00,0x00] ; MM2-NEXT: $BB0_2: # %return ; MM2-NEXT: jrc $ra # encoding: [0x45,0xbf] diff --git a/llvm/test/CodeGen/Mips/mips16ex.ll b/llvm/test/CodeGen/Mips/mips16ex.ll index 7dbccc7b223bd..fb9a44e767516 100644 --- a/llvm/test/CodeGen/Mips/mips16ex.ll +++ b/llvm/test/CodeGen/Mips/mips16ex.ll @@ -2,7 +2,7 @@ ;16: main: ;16-NEXT: [[TMP:.*]]: -;16-NEXT: .set $func_begin0, ([[TMP]]) +;16-NEXT: .set $func_begin0, [[TMP]] ;16-NEXT: .cfi_startproc ;16-NEXT: .cfi_personality @.str = private unnamed_addr constant [7 x i8] c"hello\0A\00", align 1 diff --git a/llvm/test/CodeGen/Mips/reloc-jalr.ll b/llvm/test/CodeGen/Mips/reloc-jalr.ll index 88bbfa7fdfc36..f7cdfbb64c285 100644 --- a/llvm/test/CodeGen/Mips/reloc-jalr.ll +++ b/llvm/test/CodeGen/Mips/reloc-jalr.ll @@ -102,9 +102,9 @@ entry: ; ALL-LABEL: checkCall: ; ALL-NOT: MIPS_JALR call void @foo() -; JALR-32: .reloc ([[TMPLABEL:\$.+]]), R_MIPS_JALR, foo +; JALR-32: .reloc [[TMPLABEL:\$.+]], R_MIPS_JALR, foo ; JALR-64: .reloc [[TMPLABEL:\..+]], R_MIPS_JALR, foo -; JALR-MM: .reloc ([[TMPLABEL:\$.+]]), R_MICROMIPS_JALR, foo +; JALR-MM: .reloc [[TMPLABEL:\$.+]], R_MICROMIPS_JALR, foo ; NORELOC-NOT: .reloc ; JALR-ALL-NEXT: [[TMPLABEL]]: ; JALR-32R2-NEXT: jalr $25 @@ -121,9 +121,9 @@ entry: ; ALL-LABEL: checkTailCall: ; ALL-NOT: MIPS_JALR tail call void @foo() -; JALR-32: .reloc ([[TMPLABEL:\$.+]]), R_MIPS_JALR, foo +; JALR-32: .reloc [[TMPLABEL:\$.+]], R_MIPS_JALR, foo ; JALR-64: .reloc [[TMPLABEL:\..+]], R_MIPS_JALR, foo -; JALR-MM: .reloc ([[TMPLABEL:\$.+]]), R_MICROMIPS_JALR, foo +; JALR-MM: .reloc [[TMPLABEL:\$.+]], R_MICROMIPS_JALR, foo ; JALR-ALL-NEXT: [[TMPLABEL]]: ; NORELOC-NOT: .reloc ; TAILCALL-32R2-NEXT: jr $25 diff --git a/llvm/test/CodeGen/Mips/shrink-wrapping.ll b/llvm/test/CodeGen/Mips/shrink-wrapping.ll index b08d2f1b64678..8153338253465 100644 --- a/llvm/test/CodeGen/Mips/shrink-wrapping.ll +++ b/llvm/test/CodeGen/Mips/shrink-wrapping.ll @@ -243,9 +243,9 @@ define i32 @foo2(i32 signext %a) { ; SHRINK-WRAP-PIC-NEXT: # %bb.1: ; SHRINK-WRAP-PIC-NEXT: addiu $sp, $sp, -8 ; SHRINK-WRAP-PIC-NEXT: sw $ra, 0($sp) -; SHRINK-WRAP-PIC-NEXT: lui $1, %hi(($BB1_4)-($BB1_2)) +; SHRINK-WRAP-PIC-NEXT: lui $1, %hi($BB1_4-$BB1_2) ; SHRINK-WRAP-PIC-NEXT: bal $BB1_2 -; SHRINK-WRAP-PIC-NEXT: addiu $1, $1, %lo(($BB1_4)-($BB1_2)) +; SHRINK-WRAP-PIC-NEXT: addiu $1, $1, %lo($BB1_4-$BB1_2) ; SHRINK-WRAP-PIC-NEXT: $BB1_2: ; SHRINK-WRAP-PIC-NEXT: addu $1, $ra, $1 ; SHRINK-WRAP-PIC-NEXT: lw $ra, 0($sp) @@ -272,9 +272,9 @@ define i32 @foo2(i32 signext %a) { ; NO-SHRINK-WRAP-PIC-NEXT: # %bb.1: ; NO-SHRINK-WRAP-PIC-NEXT: addiu $sp, $sp, -8 ; NO-SHRINK-WRAP-PIC-NEXT: sw $ra, 0($sp) -; NO-SHRINK-WRAP-PIC-NEXT: lui $1, %hi(($BB1_4)-($BB1_2)) +; NO-SHRINK-WRAP-PIC-NEXT: lui $1, %hi($BB1_4-$BB1_2) ; NO-SHRINK-WRAP-PIC-NEXT: bal $BB1_2 -; NO-SHRINK-WRAP-PIC-NEXT: addiu $1, $1, %lo(($BB1_4)-($BB1_2)) +; NO-SHRINK-WRAP-PIC-NEXT: addiu $1, $1, %lo($BB1_4-$BB1_2) ; NO-SHRINK-WRAP-PIC-NEXT: $BB1_2: ; NO-SHRINK-WRAP-PIC-NEXT: addu $1, $ra, $1 ; NO-SHRINK-WRAP-PIC-NEXT: lw $ra, 0($sp) diff --git a/llvm/test/CodeGen/Mips/unalignedload.ll b/llvm/test/CodeGen/Mips/unalignedload.ll index 912998ab9d038..5c78519e6481f 100644 --- a/llvm/test/CodeGen/Mips/unalignedload.ll +++ b/llvm/test/CodeGen/Mips/unalignedload.ll @@ -26,7 +26,7 @@ define void @bar1() nounwind { ; MIPS32-EL-NEXT: lbu $1, 3($1) ; MIPS32-EL-NEXT: sll $1, $1, 8 ; MIPS32-EL-NEXT: lw $25, %call16(foo2)($gp) -; MIPS32-EL-NEXT: .reloc ($tmp0), R_MIPS_JALR, foo2 +; MIPS32-EL-NEXT: .reloc $tmp0, R_MIPS_JALR, foo2 ; MIPS32-EL-NEXT: $tmp0: ; MIPS32-EL-NEXT: jalr $25 ; MIPS32-EL-NEXT: or $4, $1, $2 @@ -47,7 +47,7 @@ define void @bar1() nounwind { ; MIPS32-EB-NEXT: lbu $1, 2($1) ; MIPS32-EB-NEXT: sll $1, $1, 24 ; MIPS32-EB-NEXT: lw $25, %call16(foo2)($gp) -; MIPS32-EB-NEXT: .reloc ($tmp0), R_MIPS_JALR, foo2 +; MIPS32-EB-NEXT: .reloc $tmp0, R_MIPS_JALR, foo2 ; MIPS32-EB-NEXT: $tmp0: ; MIPS32-EB-NEXT: jalr $25 ; MIPS32-EB-NEXT: or $4, $1, $2 @@ -65,7 +65,7 @@ define void @bar1() nounwind { ; MIPS32R6-EL-NEXT: lw $1, %got(s2)($gp) ; MIPS32R6-EL-NEXT: lhu $4, 2($1) ; MIPS32R6-EL-NEXT: lw $25, %call16(foo2)($gp) -; MIPS32R6-EL-NEXT: .reloc ($tmp0), R_MIPS_JALR, foo2 +; MIPS32R6-EL-NEXT: .reloc $tmp0, R_MIPS_JALR, foo2 ; MIPS32R6-EL-NEXT: $tmp0: ; MIPS32R6-EL-NEXT: jalrc $25 ; MIPS32R6-EL-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload @@ -82,7 +82,7 @@ define void @bar1() nounwind { ; MIPS32R6-EB-NEXT: lw $1, %got(s2)($gp) ; MIPS32R6-EB-NEXT: lhu $1, 2($1) ; MIPS32R6-EB-NEXT: lw $25, %call16(foo2)($gp) -; MIPS32R6-EB-NEXT: .reloc ($tmp0), R_MIPS_JALR, foo2 +; MIPS32R6-EB-NEXT: .reloc $tmp0, R_MIPS_JALR, foo2 ; MIPS32R6-EB-NEXT: $tmp0: ; MIPS32R6-EB-NEXT: jalr $25 ; MIPS32R6-EB-NEXT: sll $4, $1, 16 @@ -113,7 +113,7 @@ define void @bar2() nounwind { ; MIPS32-EL-NEXT: lbu $1, 6($1) ; MIPS32-EL-NEXT: sll $1, $1, 16 ; MIPS32-EL-NEXT: lw $25, %call16(foo4)($gp) -; MIPS32-EL-NEXT: .reloc ($tmp1), R_MIPS_JALR, foo4 +; MIPS32-EL-NEXT: .reloc $tmp1, R_MIPS_JALR, foo4 ; MIPS32-EL-NEXT: $tmp1: ; MIPS32-EL-NEXT: jalr $25 ; MIPS32-EL-NEXT: or $5, $2, $1 @@ -139,7 +139,7 @@ define void @bar2() nounwind { ; MIPS32-EB-NEXT: lbu $1, 6($1) ; MIPS32-EB-NEXT: sll $1, $1, 8 ; MIPS32-EB-NEXT: lw $25, %call16(foo4)($gp) -; MIPS32-EB-NEXT: .reloc ($tmp1), R_MIPS_JALR, foo4 +; MIPS32-EB-NEXT: .reloc $tmp1, R_MIPS_JALR, foo4 ; MIPS32-EB-NEXT: $tmp1: ; MIPS32-EB-NEXT: jalr $25 ; MIPS32-EB-NEXT: or $5, $2, $1 @@ -160,7 +160,7 @@ define void @bar2() nounwind { ; MIPS32R6-EL-NEXT: sll $3, $3, 16 ; MIPS32R6-EL-NEXT: lw $4, 0($1) ; MIPS32R6-EL-NEXT: lw $25, %call16(foo4)($gp) -; MIPS32R6-EL-NEXT: .reloc ($tmp1), R_MIPS_JALR, foo4 +; MIPS32R6-EL-NEXT: .reloc $tmp1, R_MIPS_JALR, foo4 ; MIPS32R6-EL-NEXT: $tmp1: ; MIPS32R6-EL-NEXT: jalr $25 ; MIPS32R6-EL-NEXT: or $5, $2, $3 @@ -182,7 +182,7 @@ define void @bar2() nounwind { ; MIPS32R6-EB-NEXT: sll $3, $3, 16 ; MIPS32R6-EB-NEXT: lw $4, 0($1) ; MIPS32R6-EB-NEXT: lw $25, %call16(foo4)($gp) -; MIPS32R6-EB-NEXT: .reloc ($tmp1), R_MIPS_JALR, foo4 +; MIPS32R6-EB-NEXT: .reloc $tmp1, R_MIPS_JALR, foo4 ; MIPS32R6-EB-NEXT: $tmp1: ; MIPS32R6-EB-NEXT: jalr $25 ; MIPS32R6-EB-NEXT: or $5, $3, $2 diff --git a/llvm/test/CodeGen/Mips/xray-mips-attribute-instrumentation.ll b/llvm/test/CodeGen/Mips/xray-mips-attribute-instrumentation.ll index f49ee02eb6b70..26cea577699f6 100644 --- a/llvm/test/CodeGen/Mips/xray-mips-attribute-instrumentation.ll +++ b/llvm/test/CodeGen/Mips/xray-mips-attribute-instrumentation.ll @@ -53,8 +53,8 @@ define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" ; CHECK-MIPS64-NEXT: .8byte .Lxray_sled_0-[[TMP]] ; CHECK-MIPS64-NEXT: .8byte .Lfunc_begin0-([[TMP]]+8) ; CHECK-MIPS32: [[TMP:\$tmp[0-9]+]]: -; CHECK-MIPS32-NEXT: .4byte ($xray_sled_0)-([[TMP]]) -; CHECK-MIPS32-NEXT: .4byte ($func_begin0)-(([[TMP]])+4) +; CHECK-MIPS32-NEXT: .4byte $xray_sled_0-[[TMP]] +; CHECK-MIPS32-NEXT: .4byte $func_begin0-([[TMP]]+4) ; We test multiple returns in a single function to make sure we're getting all ; of them with XRay instrumentation. @@ -135,8 +135,8 @@ NotEqual: ; CHECK-MIPS64: .8byte .Lxray_sled_3 ; CHECK-MIPS64: .8byte .Lxray_sled_4 ; CHECK-MIPS32: [[TMP:\$tmp[0-9]+]]: -; CHECK-MIPS32-NEXT: .4byte ($xray_sled_2)-([[TMP]]) +; CHECK-MIPS32-NEXT: .4byte $xray_sled_2-[[TMP]] ; CHECK-MIPS32: [[TMP:\$tmp[0-9]+]]: -; CHECK-MIPS32-NEXT: .4byte ($xray_sled_3)-([[TMP]]) +; CHECK-MIPS32-NEXT: .4byte $xray_sled_3-[[TMP]] ; CHECK-MIPS32: [[TMP:\$tmp[0-9]+]]: -; CHECK-MIPS32-NEXT: .4byte ($xray_sled_4)-([[TMP]]) +; CHECK-MIPS32-NEXT: .4byte $xray_sled_4-[[TMP]] diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess.ll index 44d62124ac58d..742d50a0f4912 100644 --- a/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess.ll +++ b/llvm/test/CodeGen/PowerPC/aix-small-local-dynamic-tls-largeaccess.ll @@ -45,12 +45,12 @@ define signext i32 @test1() { ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r4, ElementIntTLSv1[TL]@ld(r3) ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r4, 2 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r5, ElementIntTLSv1[TL]@ld+24(r3) -; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r5, (ElementIntTLS4[TL]@ld+328)-65536(r3) -; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r4, (ElementIntTLS2[TL]@ld+320)-65536(r3) +; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r5, ElementIntTLS4[TL]@ld+328-65536(r3) +; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r4, ElementIntTLS2[TL]@ld+320-65536(r3) ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r4, 3 -; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r4, (ElementIntTLS3[TL]@ld+324)-65536(r3) +; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r4, ElementIntTLS3[TL]@ld+324-65536(r3) ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r4, 88 -; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r4, (ElementIntTLS5[TL]@ld+332)-65536(r3) +; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r4, ElementIntTLS5[TL]@ld+332-65536(r3) ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r3, 102 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: addi r1, r1, 48 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: ld r0, 16(r1) @@ -70,12 +70,12 @@ define signext i32 @test1() { ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r4, ElementIntTLSv1[TL]@ld(r3) ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r4, 2 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r5, ElementIntTLSv1[TL]@ld+24(r3) -; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r5, (ElementIntTLS4[TL]@ld+328)-65536(r3) -; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r4, (ElementIntTLS2[TL]@ld+320)-65536(r3) +; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r5, ElementIntTLS4[TL]@ld+328-65536(r3) +; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r4, ElementIntTLS2[TL]@ld+320-65536(r3) ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r4, 3 -; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r4, (ElementIntTLS3[TL]@ld+324)-65536(r3) +; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r4, ElementIntTLS3[TL]@ld+324-65536(r3) ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r4, 88 -; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r4, (ElementIntTLS5[TL]@ld+332)-65536(r3) +; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r4, ElementIntTLS5[TL]@ld+332-65536(r3) ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r3, 102 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: addi r1, r1, 48 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: ld r0, 16(r1) @@ -124,7 +124,7 @@ define i64 @test2() { ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: mr r6, r3 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r3, 212 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r4, 203 -; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: std r4, (ElementLongTLS2[TL]@ld+1200)-131072(r6) +; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: std r4, ElementLongTLS2[TL]@ld+1200-131072(r6) ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: ld r4, L..C1(r2) # target-flags(ppc-tlsgd) @MyTLSGDVar ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: std r3, ElementLongTLS6[UL]@ld+424(r6) ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: ld r3, L..C2(r2) # target-flags(ppc-tlsgdm) @MyTLSGDVar @@ -133,10 +133,10 @@ define i64 @test2() { ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: std r4, 440(r3) ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r3, 6 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r4, 100 -; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: std r3, (ElementLongTLS3[TL]@ld+2000)-196608(r6) +; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: std r3, ElementLongTLS3[TL]@ld+2000-196608(r6) ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r3, 882 -; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: std r4, (ElementLongTLS4[TL]@ld+6800)-196608(r6) -; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: std r3, (ElementLongTLS5[TL]@ld+8400)-196608(r6) +; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: std r4, ElementLongTLS4[TL]@ld+6800-196608(r6) +; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: std r3, ElementLongTLS5[TL]@ld+8400-196608(r6) ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r3, 1191 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: addi r1, r1, 48 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: ld r0, 16(r1) @@ -157,7 +157,7 @@ define i64 @test2() { ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: ld r4, L..C1@l(r4) ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: std r3, ElementLongTLS6[UL]@ld+424(r6) ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r3, 203 -; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: std r3, (ElementLongTLS2[TL]@ld+1200)-131072(r6) +; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: std r3, ElementLongTLS2[TL]@ld+1200-131072(r6) ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: addis r3, L..C2@u(r2) ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: ld r3, L..C2@l(r3) ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: bla .__tls_get_addr[PR] @@ -165,10 +165,10 @@ define i64 @test2() { ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: std r4, 440(r3) ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r3, 6 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r4, 100 -; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: std r3, (ElementLongTLS3[TL]@ld+2000)-196608(r6) +; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: std r3, ElementLongTLS3[TL]@ld+2000-196608(r6) ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r3, 882 -; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: std r4, (ElementLongTLS4[TL]@ld+6800)-196608(r6) -; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: std r3, (ElementLongTLS5[TL]@ld+8400)-196608(r6) +; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: std r4, ElementLongTLS4[TL]@ld+6800-196608(r6) +; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: std r3, ElementLongTLS5[TL]@ld+8400-196608(r6) ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r3, 1191 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: addi r1, r1, 48 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: ld r0, 16(r1) @@ -214,13 +214,13 @@ define signext i32 @test3() { ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: bla .__tls_get_mod[PR] ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r5, 2 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r4, 1 -; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r6, (ElementIntTLS3[TL]@ld+324)-65536(r3) +; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r6, ElementIntTLS3[TL]@ld+324-65536(r3) ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: ld r6, L..C3(r2) # target-flags(ppc-tlsld) @ElementIntTLSv2 -; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r5, (ElementIntTLS2[TL]@ld+320)-65536(r3) +; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r5, ElementIntTLS2[TL]@ld+320-65536(r3) ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r5, 88 -; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r5, (ElementIntTLS5[TL]@ld+332)-65536(r3) +; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r5, ElementIntTLS5[TL]@ld+332-65536(r3) ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r5, 4 -; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r5, (ElementIntTLS4[TL]@ld+328)-65536(r3) +; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r5, ElementIntTLS4[TL]@ld+328-65536(r3) ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stwux r4, r3, r6 ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: stw r5, 24(r3) ; SMALL-LOCAL-DYNAMIC-SMALLCM64-NEXT: li r3, 102 @@ -241,13 +241,13 @@ define signext i32 @test3() { ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: bla .__tls_get_mod[PR] ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r5, 2 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r4, 1 -; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r5, (ElementIntTLS2[TL]@ld+320)-65536(r3) +; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r5, ElementIntTLS2[TL]@ld+320-65536(r3) ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r5, 3 -; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r5, (ElementIntTLS3[TL]@ld+324)-65536(r3) +; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r5, ElementIntTLS3[TL]@ld+324-65536(r3) ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r5, 88 -; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r5, (ElementIntTLS5[TL]@ld+332)-65536(r3) +; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r5, ElementIntTLS5[TL]@ld+332-65536(r3) ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r5, 4 -; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r5, (ElementIntTLS4[TL]@ld+328)-65536(r3) +; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r5, ElementIntTLS4[TL]@ld+328-65536(r3) ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stwux r4, r3, r6 ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: stw r5, 24(r3) ; SMALL-LOCAL-DYNAMIC-LARGECM64-NEXT: li r3, 102 diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-largeaccess.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-largeaccess.ll index 91013af7a3188..a6d1fa1328290 100644 --- a/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-largeaccess.ll +++ b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-largeaccess.ll @@ -30,12 +30,12 @@ define signext i32 @StoreArrays1() { ; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r3, mySmallLocalExecTLSv1[TL]@le(r13) ; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 2 ; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r4, mySmallLocalExecTLSv1[TL]@le+24(r13) -; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r4, (mySmallLocalExecTLS4[TL]@le+328)-65536(r13) -; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r3, (mySmallLocalExecTLS2[TL]@le+320)-65536(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r4, mySmallLocalExecTLS4[TL]@le+328-65536(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r3, mySmallLocalExecTLS2[TL]@le+320-65536(r13) ; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 3 -; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r3, (mySmallLocalExecTLS3[TL]@le+324)-65536(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r3, mySmallLocalExecTLS3[TL]@le+324-65536(r13) ; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 88 -; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r3, (mySmallLocalExecTLS5[TL]@le+332)-65536(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r3, mySmallLocalExecTLS5[TL]@le+332-65536(r13) ; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 102 ; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr ; @@ -46,12 +46,12 @@ define signext i32 @StoreArrays1() { ; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r3, mySmallLocalExecTLSv1[TL]@le(r13) ; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 2 ; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r4, mySmallLocalExecTLSv1[TL]@le+24(r13) -; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r4, (mySmallLocalExecTLS4[TL]@le+328)-65536(r13) -; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r3, (mySmallLocalExecTLS2[TL]@le+320)-65536(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r4, mySmallLocalExecTLS4[TL]@le+328-65536(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r3, mySmallLocalExecTLS2[TL]@le+320-65536(r13) ; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 3 -; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r3, (mySmallLocalExecTLS3[TL]@le+324)-65536(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r3, mySmallLocalExecTLS3[TL]@le+324-65536(r13) ; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 88 -; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r3, (mySmallLocalExecTLS5[TL]@le+332)-65536(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r3, mySmallLocalExecTLS5[TL]@le+332-65536(r13) ; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 102 ; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr entry: @@ -90,36 +90,36 @@ define signext i32 @StoreArrays2() { ; SMALL-LOCAL-EXEC-SMALLCM64: # %bb.0: # %entry ; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r4, 2 ; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 1 -; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r4, (mySmallLocalExecTLS2[TL]@le+320)-65536(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r4, mySmallLocalExecTLS2[TL]@le+320-65536(r13) ; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r4, 3 -; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r4, (mySmallLocalExecTLS3[TL]@le+324)-65536(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r4, mySmallLocalExecTLS3[TL]@le+324-65536(r13) ; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r4, L..C0(r2) # target-flags(ppc-tprel) @mySmallLocalExecTLSv2 ; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: add r4, r13, r4 ; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r3, 0(r4) ; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 4 ; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r3, 24(r4) ; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r4, 88 -; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r3, (mySmallLocalExecTLS4[TL]@le+328)-65536(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r3, mySmallLocalExecTLS4[TL]@le+328-65536(r13) ; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 102 -; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r4, (mySmallLocalExecTLS5[TL]@le+332)-65536(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: stw r4, mySmallLocalExecTLS5[TL]@le+332-65536(r13) ; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: blr ; ; SMALL-LOCAL-EXEC-LARGECM64-LABEL: StoreArrays2: ; SMALL-LOCAL-EXEC-LARGECM64: # %bb.0: # %entry ; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 2 ; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r4, 3 -; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r3, (mySmallLocalExecTLS2[TL]@le+320)-65536(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r3, mySmallLocalExecTLS2[TL]@le+320-65536(r13) ; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addis r3, L..C0@u(r2) ; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r3, L..C0@l(r3) -; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r4, (mySmallLocalExecTLS3[TL]@le+324)-65536(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r4, mySmallLocalExecTLS3[TL]@le+324-65536(r13) ; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r4, 1 ; SMALL-LOCAL-EXEC-LARGECM64-NEXT: add r3, r13, r3 ; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r4, 0(r3) ; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r4, 4 ; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r4, 24(r3) ; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 88 -; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r4, (mySmallLocalExecTLS4[TL]@le+328)-65536(r13) -; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r3, (mySmallLocalExecTLS5[TL]@le+332)-65536(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r4, mySmallLocalExecTLS4[TL]@le+328-65536(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: stw r3, mySmallLocalExecTLS5[TL]@le+332-65536(r13) ; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 102 ; SMALL-LOCAL-EXEC-LARGECM64-NEXT: blr entry: diff --git a/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-largeaccess2.ll b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-largeaccess2.ll index f7b99461be5f3..7a6db3273421f 100644 --- a/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-largeaccess2.ll +++ b/llvm/test/CodeGen/PowerPC/aix-small-local-exec-tls-largeaccess2.ll @@ -42,8 +42,8 @@ define i64 @StoreLargeAccess1() { ; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r4, 100 ; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: std r3, mySmallLocalExecTLS3[TL]@le+2000(r13) ; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 882 -; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: std r4, (mySmallLocalExecTLS4[TL]@le+6800)-65536(r13) -; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: std r3, (mySmallLocalExecTLS5[TL]@le+8400)-65536(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: std r4, mySmallLocalExecTLS4[TL]@le+6800-65536(r13) +; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: std r3, mySmallLocalExecTLS5[TL]@le+8400-65536(r13) ; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: li r3, 1191 ; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: addi r1, r1, 48 ; SMALL-LOCAL-EXEC-SMALLCM64-NEXT: ld r0, 16(r1) @@ -70,8 +70,8 @@ define i64 @StoreLargeAccess1() { ; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r4, 100 ; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r3, mySmallLocalExecTLS3[TL]@le+2000(r13) ; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 882 -; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r4, (mySmallLocalExecTLS4[TL]@le+6800)-65536(r13) -; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r3, (mySmallLocalExecTLS5[TL]@le+8400)-65536(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r4, mySmallLocalExecTLS4[TL]@le+6800-65536(r13) +; SMALL-LOCAL-EXEC-LARGECM64-NEXT: std r3, mySmallLocalExecTLS5[TL]@le+8400-65536(r13) ; SMALL-LOCAL-EXEC-LARGECM64-NEXT: li r3, 1191 ; SMALL-LOCAL-EXEC-LARGECM64-NEXT: addi r1, r1, 48 ; SMALL-LOCAL-EXEC-LARGECM64-NEXT: ld r0, 16(r1) diff --git a/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-funcattr.ll b/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-funcattr.ll index 91a2283897f33..c9a9f36bd1634 100644 --- a/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-funcattr.ll +++ b/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-funcattr.ll @@ -27,9 +27,9 @@ define i64 @StoreLargeAccess1() #1 { ; COMMONCM-NEXT: stdx r5, r3, r4 ; COMMONCM-NEXT: li r3, 55 ; COMMONCM-NEXT: li r4, 64 -; COMMONCM-NEXT: std r3, (mySmallTLS2[TL]@le+696)-65536(r13) +; COMMONCM-NEXT: std r3, mySmallTLS2[TL]@le+696-65536(r13) ; COMMONCM-NEXT: li r3, 142 -; COMMONCM-NEXT: std r4, (mySmallTLS3[TL]@le+20000)-131072(r13) +; COMMONCM-NEXT: std r4, mySmallTLS3[TL]@le+20000-131072(r13) ; COMMONCM-NEXT: blr entry: %tls0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS) diff --git a/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-targetattr.ll b/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-targetattr.ll index 1e4a3b9bcc47c..3029c85bb5fa7 100644 --- a/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-targetattr.ll +++ b/llvm/test/CodeGen/PowerPC/aix-small-tls-globalvarattr-targetattr.ll @@ -29,9 +29,9 @@ define i64 @StoreLargeAccess1() { ; COMMONCM-NEXT: stdx r5, r3, r4 ; COMMONCM-NEXT: li r3, 55 ; COMMONCM-NEXT: li r4, 64 -; COMMONCM-NEXT: std r3, (mySmallTLS2[TL]@le+696)-65536(r13) +; COMMONCM-NEXT: std r3, mySmallTLS2[TL]@le+696-65536(r13) ; COMMONCM-NEXT: li r3, 142 -; COMMONCM-NEXT: std r4, (mySmallTLS3[TL]@le+20000)-131072(r13) +; COMMONCM-NEXT: std r4, mySmallTLS3[TL]@le+20000-131072(r13) ; COMMONCM-NEXT: blr entry: %tls0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallTLS) diff --git a/llvm/test/CodeGen/RISCV/dso_local_equivalent.ll b/llvm/test/CodeGen/RISCV/dso_local_equivalent.ll index 1ee8b1f78110b..e5e69b7bfe13b 100644 --- a/llvm/test/CodeGen/RISCV/dso_local_equivalent.ll +++ b/llvm/test/CodeGen/RISCV/dso_local_equivalent.ll @@ -26,9 +26,9 @@ declare void @extern_func() ; CHECK-NEXT: .word 0 # 0x0 ; CHECK-NEXT: .word %pltpcrel(f0) ; CHECK-NEXT: .word %pltpcrel(f1+4) -; CHECK-NEXT: .word (f2-_ZTV1B)-8 +; CHECK-NEXT: .word f2-_ZTV1B-8 ; CHECK-NEXT: .word %pltpcrel(f3+12) -; CHECK-NEXT: .word (f4-_ZTV1B)-8 +; CHECK-NEXT: .word f4-_ZTV1B-8 ; CHECK-NEXT: .size _ZTV1B, 28 declare void @f0() declare void @f1() diff --git a/llvm/test/CodeGen/RISCV/plt-relative-reloc.ll b/llvm/test/CodeGen/RISCV/plt-relative-reloc.ll index a432fc5e7e530..d2dceb773b2e9 100644 --- a/llvm/test/CodeGen/RISCV/plt-relative-reloc.ll +++ b/llvm/test/CodeGen/RISCV/plt-relative-reloc.ll @@ -16,6 +16,6 @@ declare void @fn3() ; CHECK-NEXT: .word 0 # 0x0 ; CHECK-NEXT: .word %pltpcrel(fn1) ; CHECK-NEXT: .word %pltpcrel(fn2+4) -; CHECK-NEXT: .word (fn3-vtable)-4 -; CHECK-NEXT: .word (global4-vtable)-4 +; CHECK-NEXT: .word fn3-vtable-4 +; CHECK-NEXT: .word global4-vtable-4 ; CHECK-NEXT: .size vtable, 20 diff --git a/llvm/test/CodeGen/RISCV/reassoc-shl-addi-add.ll b/llvm/test/CodeGen/RISCV/reassoc-shl-addi-add.ll new file mode 100644 index 0000000000000..88ab1c0c3eaef --- /dev/null +++ b/llvm/test/CodeGen/RISCV/reassoc-shl-addi-add.ll @@ -0,0 +1,189 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv32-unknown-elf -mattr=+zba %s -o - | FileCheck %s + +declare i32 @callee1(i32 noundef) +declare i32 @callee2(i32 noundef, i32 noundef) +declare i32 @callee(i32 noundef, i32 noundef, i32 noundef, i32 noundef) + +define void @t1(i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d) #0 { +; CHECK-LABEL: t1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sh2add a2, a0, a2 +; CHECK-NEXT: sh2add a1, a0, a1 +; CHECK-NEXT: addi a1, a1, 45 +; CHECK-NEXT: addi a2, a2, 45 +; CHECK-NEXT: sh2add a3, a0, a3 +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: tail callee +entry: + %shl = shl i32 %a, 2 + %add = add nsw i32 %shl, 45 + %add1 = add nsw i32 %add, %b + %add3 = add nsw i32 %add, %c + %add5 = add nsw i32 %shl, %d + %call = tail call i32 @callee(i32 noundef %add1, i32 noundef %add1, i32 noundef %add3, i32 noundef %add5) + ret void +} + +define void @t2(i32 noundef %a, i32 noundef %b, i32 noundef %c) #0 { +; CHECK-LABEL: t2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: addi a5, a0, 42 +; CHECK-NEXT: add a4, a5, a1 +; CHECK-NEXT: add a3, a5, a2 +; CHECK-NEXT: mv a1, a5 +; CHECK-NEXT: mv a2, a4 +; CHECK-NEXT: tail callee +entry: + %shl = shl i32 %a, 2 + %add = add nsw i32 %shl, 42 + %add4 = add nsw i32 %add, %b + %add7 = add nsw i32 %add, %c + %call = tail call i32 @callee(i32 noundef %shl, i32 noundef %add, i32 noundef %add4, i32 noundef %add7) + ret void +} + +define void @t3(i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d, i32 noundef %e) #0 { +; CHECK-LABEL: t3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: addi a5, a0, 42 +; CHECK-NEXT: add a0, a5, a1 +; CHECK-NEXT: add a1, a5, a2 +; CHECK-NEXT: add a2, a5, a3 +; CHECK-NEXT: add a3, a5, a4 +; CHECK-NEXT: tail callee +entry: + %shl = shl i32 %a, 2 + %add = add nsw i32 %shl, 42 + %add1 = add nsw i32 %add, %b + %add2 = add nsw i32 %add, %c + %add3 = add nsw i32 %add, %d + %add4 = add nsw i32 %add, %e + %call = tail call i32 @callee(i32 noundef %add1, i32 noundef %add2, i32 noundef %add3, i32 noundef %add4) + ret void +} + +define void @t4(i32 noundef %a, i32 noundef %b) #0 { +; CHECK-LABEL: t4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sh2add a0, a0, a1 +; CHECK-NEXT: addi a0, a0, 42 +; CHECK-NEXT: tail callee1 +entry: + %shl = shl i32 %a, 2 + %add = add nsw i32 %shl, 42 + %add1 = add nsw i32 %add, %b + %call = tail call i32 @callee1(i32 noundef %add1) + ret void +} + +define void @t5(i32 noundef %a, i32 noundef %b, i32 noundef %c) #0 { +; CHECK-LABEL: t5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sh2add a2, a0, a2 +; CHECK-NEXT: sh2add a0, a0, a1 +; CHECK-NEXT: addi a0, a0, 42 +; CHECK-NEXT: addi a1, a2, 42 +; CHECK-NEXT: tail callee2 +entry: + %shl = shl i32 %a, 2 + %add = add nsw i32 %shl, 42 + %add1 = add nsw i32 %add, %b + %add2 = add nsw i32 %add, %c + %call = tail call i32 @callee2(i32 noundef %add1, i32 noundef %add2) + ret void +} + +define void @t6(i32 noundef %a, i32 noundef %b) #0 { +; CHECK-LABEL: t6: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a2, a0, 2 +; CHECK-NEXT: sh2add a0, a0, a1 +; CHECK-NEXT: addi a0, a0, 42 +; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: tail callee +entry: + %shl = shl i32 %a, 2 + %add = add nsw i32 %shl, 42 + %add1 = add nsw i32 %add, %b + %call = tail call i32 @callee(i32 noundef %add1, i32 noundef %shl, i32 noundef %shl, i32 noundef %shl) + ret void +} + +define void @t7(i32 noundef %a, i32 noundef %b) #0 { +; CHECK-LABEL: t7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: addi a2, a0, 42 +; CHECK-NEXT: add a0, a2, a1 +; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: tail callee +entry: + %shl = shl i32 %a, 2 + %add = add nsw i32 %shl, 42 + %add1 = add nsw i32 %add, %b + %call = tail call i32 @callee(i32 noundef %add1, i32 noundef %add, i32 noundef %add, i32 noundef %add) + ret void +} + +define void @t8(i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d) #0 { +; CHECK-LABEL: t8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sh3add a2, a0, a2 +; CHECK-NEXT: sh3add a1, a0, a1 +; CHECK-NEXT: lui a4, 1 +; CHECK-NEXT: addi a4, a4, 1307 +; CHECK-NEXT: add a1, a1, a4 +; CHECK-NEXT: add a2, a2, a4 +; CHECK-NEXT: sh3add a3, a0, a3 +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: tail callee +entry: + %shl = shl i32 %a, 3 + %add = add nsw i32 %shl, 5403 + %add1 = add nsw i32 %add, %b + %add3 = add nsw i32 %add, %c + %add5 = add nsw i32 %shl, %d + %call = tail call i32 @callee(i32 noundef %add1, i32 noundef %add1, i32 noundef %add3, i32 noundef %add5) + ret void +} + +define void @t9(i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d) #0 { +; CHECK-LABEL: t9: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sh2add a2, a0, a2 +; CHECK-NEXT: sh2add a1, a0, a1 +; CHECK-NEXT: addi a1, a1, -42 +; CHECK-NEXT: addi a2, a2, -42 +; CHECK-NEXT: sh2add a3, a0, a3 +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: tail callee +entry: + %shl = shl i32 %a, 2 + %add = add nsw i32 %shl, -42 + %add1 = add nsw i32 %add, %b + %add3 = add nsw i32 %add, %c + %add5 = add nsw i32 %shl, %d + %call = tail call i32 @callee(i32 noundef %add1, i32 noundef %add1, i32 noundef %add3, i32 noundef %add5) + ret void +} + +define void @t10(i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d) #0 { +; CHECK-LABEL: t10: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: tail callee +entry: + %shl = shl i32 %a, -2 + %add = add nsw i32 %shl, 42 + %add1 = add nsw i32 %add, %b + %add3 = add nsw i32 %add, %c + %add5 = add nsw i32 %shl, %d + %call = tail call i32 @callee(i32 noundef %add1, i32 noundef %add1, i32 noundef %add3, i32 noundef %add5) + ret void +} + +attributes #0 = { nounwind optsize } diff --git a/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare-asm.ll b/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare-asm.ll index 3bbdd1a257fdb..4e5f6e0f65489 100644 --- a/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare-asm.ll +++ b/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare-asm.ll @@ -42,3 +42,459 @@ vector.body: exit: ret float %acc } + +define i32 @vp_reduce_add(ptr %a) { +; CHECK-LABEL: vp_reduce_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: .LBB1_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetvli a3, a2, e32, m2, ta, ma +; CHECK-NEXT: slli a4, a1, 2 +; CHECK-NEXT: add a4, a0, a4 +; CHECK-NEXT: vle32.v v10, (a4) +; CHECK-NEXT: sub a2, a2, a3 +; CHECK-NEXT: vredsum.vs v8, v10, v8 +; CHECK-NEXT: add a1, a1, a3 +; CHECK-NEXT: bnez a2, .LBB1_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret +entry: + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %trip.count = phi i64 [ 1024, %entry ], [ %remaining.trip.count, %vector.body ] + %scalar.ind = phi i64 [ 0, %entry ], [ %next.ind, %vector.body ] + %red.phi = phi i32 [ 0, %entry ], [ %red, %vector.body ] + %evl = tail call i32 @llvm.experimental.get.vector.length.i64(i64 %trip.count, i32 4, i1 true) + %evl2 = zext i32 %evl to i64 + %arrayidx6 = getelementptr inbounds i32, ptr %a, i64 %scalar.ind + %wide.load = tail call @llvm.vp.load.nxv4i32.p0(ptr %arrayidx6, splat (i1 true), i32 %evl) + %red = tail call i32 @llvm.vp.reduce.add.nxv4i32(i32 %red.phi, %wide.load, splat (i1 true), i32 %evl) + %remaining.trip.count = sub nuw i64 %trip.count, %evl2 + %next.ind = add i64 %scalar.ind, %evl2 + %m = icmp eq i64 %remaining.trip.count, 0 + br i1 %m, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret i32 %red +} + +define i32 @vp_reduce_and(ptr %a) { +; CHECK-LABEL: vp_reduce_and: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: lui a2, 524288 +; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v8, a2 +; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: .LBB2_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetvli a3, a2, e32, m2, ta, ma +; CHECK-NEXT: slli a4, a1, 2 +; CHECK-NEXT: add a4, a0, a4 +; CHECK-NEXT: vle32.v v10, (a4) +; CHECK-NEXT: sub a2, a2, a3 +; CHECK-NEXT: vredand.vs v8, v10, v8 +; CHECK-NEXT: add a1, a1, a3 +; CHECK-NEXT: bnez a2, .LBB2_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret +entry: + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %trip.count = phi i64 [ 1024, %entry ], [ %remaining.trip.count, %vector.body ] + %scalar.ind = phi i64 [ 0, %entry ], [ %next.ind, %vector.body ] + %red.phi = phi i32 [ -2147483648, %entry ], [ %red, %vector.body ] + %evl = tail call i32 @llvm.experimental.get.vector.length.i64(i64 %trip.count, i32 4, i1 true) + %evl2 = zext i32 %evl to i64 + %arrayidx6 = getelementptr inbounds i32, ptr %a, i64 %scalar.ind + %wide.load = tail call @llvm.vp.load.nxv4i32.p0(ptr %arrayidx6, splat (i1 true), i32 %evl) + %red = tail call i32 @llvm.vp.reduce.and.nxv4i32(i32 %red.phi, %wide.load, splat (i1 true), i32 %evl) + %remaining.trip.count = sub nuw i64 %trip.count, %evl2 + %next.ind = add i64 %scalar.ind, %evl2 + %m = icmp eq i64 %remaining.trip.count, 0 + br i1 %m, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret i32 %red +} + +define i32 @vp_reduce_or(ptr %a) { +; CHECK-LABEL: vp_reduce_or: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: .LBB3_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetvli a3, a2, e32, m2, ta, ma +; CHECK-NEXT: slli a4, a1, 2 +; CHECK-NEXT: add a4, a0, a4 +; CHECK-NEXT: vle32.v v10, (a4) +; CHECK-NEXT: sub a2, a2, a3 +; CHECK-NEXT: vredor.vs v8, v10, v8 +; CHECK-NEXT: add a1, a1, a3 +; CHECK-NEXT: bnez a2, .LBB3_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret +entry: + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %trip.count = phi i64 [ 1024, %entry ], [ %remaining.trip.count, %vector.body ] + %scalar.ind = phi i64 [ 0, %entry ], [ %next.ind, %vector.body ] + %red.phi = phi i32 [ 0, %entry ], [ %red, %vector.body ] + %evl = tail call i32 @llvm.experimental.get.vector.length.i64(i64 %trip.count, i32 4, i1 true) + %evl2 = zext i32 %evl to i64 + %arrayidx6 = getelementptr inbounds i32, ptr %a, i64 %scalar.ind + %wide.load = tail call @llvm.vp.load.nxv4i32.p0(ptr %arrayidx6, splat (i1 true), i32 %evl) + %red = tail call i32 @llvm.vp.reduce.or.nxv4i32(i32 %red.phi, %wide.load, splat (i1 true), i32 %evl) + %remaining.trip.count = sub nuw i64 %trip.count, %evl2 + %next.ind = add i64 %scalar.ind, %evl2 + %m = icmp eq i64 %remaining.trip.count, 0 + br i1 %m, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret i32 %red +} + +define i32 @vp_reduce_xor(ptr %a) { +; CHECK-LABEL: vp_reduce_xor: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: .LBB4_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetvli a3, a2, e32, m2, ta, ma +; CHECK-NEXT: slli a4, a1, 2 +; CHECK-NEXT: add a4, a0, a4 +; CHECK-NEXT: vle32.v v10, (a4) +; CHECK-NEXT: sub a2, a2, a3 +; CHECK-NEXT: vredxor.vs v8, v10, v8 +; CHECK-NEXT: add a1, a1, a3 +; CHECK-NEXT: bnez a2, .LBB4_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret +entry: + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %trip.count = phi i64 [ 1024, %entry ], [ %remaining.trip.count, %vector.body ] + %scalar.ind = phi i64 [ 0, %entry ], [ %next.ind, %vector.body ] + %red.phi = phi i32 [ 0, %entry ], [ %red, %vector.body ] + %evl = tail call i32 @llvm.experimental.get.vector.length.i64(i64 %trip.count, i32 4, i1 true) + %evl2 = zext i32 %evl to i64 + %arrayidx6 = getelementptr inbounds i32, ptr %a, i64 %scalar.ind + %wide.load = tail call @llvm.vp.load.nxv4i32.p0(ptr %arrayidx6, splat (i1 true), i32 %evl) + %red = tail call i32 @llvm.vp.reduce.xor.nxv4i32(i32 %red.phi, %wide.load, splat (i1 true), i32 %evl) + %remaining.trip.count = sub nuw i64 %trip.count, %evl2 + %next.ind = add i64 %scalar.ind, %evl2 + %m = icmp eq i64 %remaining.trip.count, 0 + br i1 %m, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret i32 %red +} + +define i32 @vp_reduce_smax(ptr %a) { +; CHECK-LABEL: vp_reduce_smax: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: lui a2, 524288 +; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v8, a2 +; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: .LBB5_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetvli a3, a2, e32, m2, ta, ma +; CHECK-NEXT: slli a4, a1, 2 +; CHECK-NEXT: add a4, a0, a4 +; CHECK-NEXT: vle32.v v10, (a4) +; CHECK-NEXT: sub a2, a2, a3 +; CHECK-NEXT: vredmax.vs v8, v10, v8 +; CHECK-NEXT: add a1, a1, a3 +; CHECK-NEXT: bnez a2, .LBB5_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret +entry: + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %trip.count = phi i64 [ 1024, %entry ], [ %remaining.trip.count, %vector.body ] + %scalar.ind = phi i64 [ 0, %entry ], [ %next.ind, %vector.body ] + %red.phi = phi i32 [ -2147483648, %entry ], [ %red, %vector.body ] + %evl = tail call i32 @llvm.experimental.get.vector.length.i64(i64 %trip.count, i32 4, i1 true) + %evl2 = zext i32 %evl to i64 + %arrayidx6 = getelementptr inbounds i32, ptr %a, i64 %scalar.ind + %wide.load = tail call @llvm.vp.load.nxv4i32.p0(ptr %arrayidx6, splat (i1 true), i32 %evl) + %red = tail call i32 @llvm.vp.reduce.smax.nxv4i32(i32 %red.phi, %wide.load, splat (i1 true), i32 %evl) + %remaining.trip.count = sub nuw i64 %trip.count, %evl2 + %next.ind = add i64 %scalar.ind, %evl2 + %m = icmp eq i64 %remaining.trip.count, 0 + br i1 %m, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret i32 %red +} + +define i32 @vp_reduce_smin(ptr %a) { +; CHECK-LABEL: vp_reduce_smin: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: lui a2, 524288 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v8, a2 +; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: .LBB6_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetvli a3, a2, e32, m2, ta, ma +; CHECK-NEXT: slli a4, a1, 2 +; CHECK-NEXT: add a4, a0, a4 +; CHECK-NEXT: vle32.v v10, (a4) +; CHECK-NEXT: sub a2, a2, a3 +; CHECK-NEXT: vredmin.vs v8, v10, v8 +; CHECK-NEXT: add a1, a1, a3 +; CHECK-NEXT: bnez a2, .LBB6_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret +entry: + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %trip.count = phi i64 [ 1024, %entry ], [ %remaining.trip.count, %vector.body ] + %scalar.ind = phi i64 [ 0, %entry ], [ %next.ind, %vector.body ] + %red.phi = phi i32 [ 2147483647, %entry ], [ %red, %vector.body ] + %evl = tail call i32 @llvm.experimental.get.vector.length.i64(i64 %trip.count, i32 4, i1 true) + %evl2 = zext i32 %evl to i64 + %arrayidx6 = getelementptr inbounds i32, ptr %a, i64 %scalar.ind + %wide.load = tail call @llvm.vp.load.nxv4i32.p0(ptr %arrayidx6, splat (i1 true), i32 %evl) + %red = tail call i32 @llvm.vp.reduce.smin.nxv4i32(i32 %red.phi, %wide.load, splat (i1 true), i32 %evl) + %remaining.trip.count = sub nuw i64 %trip.count, %evl2 + %next.ind = add i64 %scalar.ind, %evl2 + %m = icmp eq i64 %remaining.trip.count, 0 + br i1 %m, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret i32 %red +} + +define i32 @vp_reduce_umax(ptr %a) { +; CHECK-LABEL: vp_reduce_umax: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: .LBB7_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetvli a3, a2, e32, m2, ta, ma +; CHECK-NEXT: slli a4, a1, 2 +; CHECK-NEXT: add a4, a0, a4 +; CHECK-NEXT: vle32.v v10, (a4) +; CHECK-NEXT: sub a2, a2, a3 +; CHECK-NEXT: vredmaxu.vs v8, v10, v8 +; CHECK-NEXT: add a1, a1, a3 +; CHECK-NEXT: bnez a2, .LBB7_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret +entry: + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %trip.count = phi i64 [ 1024, %entry ], [ %remaining.trip.count, %vector.body ] + %scalar.ind = phi i64 [ 0, %entry ], [ %next.ind, %vector.body ] + %red.phi = phi i32 [ 0, %entry ], [ %red, %vector.body ] + %evl = tail call i32 @llvm.experimental.get.vector.length.i64(i64 %trip.count, i32 4, i1 true) + %evl2 = zext i32 %evl to i64 + %arrayidx6 = getelementptr inbounds i32, ptr %a, i64 %scalar.ind + %wide.load = tail call @llvm.vp.load.nxv4i32.p0(ptr %arrayidx6, splat (i1 true), i32 %evl) + %red = tail call i32 @llvm.vp.reduce.umax.nxv4i32(i32 %red.phi, %wide.load, splat (i1 true), i32 %evl) + %remaining.trip.count = sub nuw i64 %trip.count, %evl2 + %next.ind = add i64 %scalar.ind, %evl2 + %m = icmp eq i64 %remaining.trip.count, 0 + br i1 %m, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret i32 %red +} + +define i32 @vp_reduce_umin(ptr %a) { +; CHECK-LABEL: vp_reduce_umin: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: lui a2, 524288 +; CHECK-NEXT: vsetvli a3, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v8, a2 +; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: .LBB8_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetvli a3, a2, e32, m2, ta, ma +; CHECK-NEXT: slli a4, a1, 2 +; CHECK-NEXT: add a4, a0, a4 +; CHECK-NEXT: vle32.v v10, (a4) +; CHECK-NEXT: sub a2, a2, a3 +; CHECK-NEXT: vredminu.vs v8, v10, v8 +; CHECK-NEXT: add a1, a1, a3 +; CHECK-NEXT: bnez a2, .LBB8_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret +entry: + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %trip.count = phi i64 [ 1024, %entry ], [ %remaining.trip.count, %vector.body ] + %scalar.ind = phi i64 [ 0, %entry ], [ %next.ind, %vector.body ] + %red.phi = phi i32 [ -2147483648, %entry ], [ %red, %vector.body ] + %evl = tail call i32 @llvm.experimental.get.vector.length.i64(i64 %trip.count, i32 4, i1 true) + %evl2 = zext i32 %evl to i64 + %arrayidx6 = getelementptr inbounds i32, ptr %a, i64 %scalar.ind + %wide.load = tail call @llvm.vp.load.nxv4i32.p0(ptr %arrayidx6, splat (i1 true), i32 %evl) + %red = tail call i32 @llvm.vp.reduce.umin.nxv4i32(i32 %red.phi, %wide.load, splat (i1 true), i32 %evl) + %remaining.trip.count = sub nuw i64 %trip.count, %evl2 + %next.ind = add i64 %scalar.ind, %evl2 + %m = icmp eq i64 %remaining.trip.count, 0 + br i1 %m, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret i32 %red +} + +define float @vp_reduce_fadd(ptr %a) { +; CHECK-LABEL: vp_reduce_fadd: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: .LBB9_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetvli a3, a2, e32, m2, ta, ma +; CHECK-NEXT: slli a4, a1, 2 +; CHECK-NEXT: add a4, a0, a4 +; CHECK-NEXT: vle32.v v10, (a4) +; CHECK-NEXT: sub a2, a2, a3 +; CHECK-NEXT: vfredosum.vs v8, v10, v8 +; CHECK-NEXT: add a1, a1, a3 +; CHECK-NEXT: bnez a2, .LBB9_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: ret +entry: + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %trip.count = phi i64 [ 1024, %entry ], [ %remaining.trip.count, %vector.body ] + %scalar.ind = phi i64 [ 0, %entry ], [ %next.ind, %vector.body ] + %red.phi = phi float [ 0.000000e+00, %entry ], [ %red, %vector.body ] + %evl = tail call i32 @llvm.experimental.get.vector.length.i64(i64 %trip.count, i32 4, i1 true) + %evl2 = zext i32 %evl to i64 + %arrayidx6 = getelementptr inbounds float, ptr %a, i64 %scalar.ind + %wide.load = tail call @llvm.vp.load.nxv4f32.p0(ptr %arrayidx6, splat (i1 true), i32 %evl) + %red = tail call float @llvm.vp.reduce.fadd.nxv4f32(float %red.phi, %wide.load, splat (i1 true), i32 %evl) + %remaining.trip.count = sub nuw i64 %trip.count, %evl2 + %next.ind = add i64 %scalar.ind, %evl2 + %m = icmp eq i64 %remaining.trip.count, 0 + br i1 %m, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret float %red +} + +define float @vp_reduce_fmax(ptr %a) { +; CHECK-LABEL: vp_reduce_fmax: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: .LBB10_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetvli a3, a2, e32, m2, ta, ma +; CHECK-NEXT: slli a4, a1, 2 +; CHECK-NEXT: add a4, a0, a4 +; CHECK-NEXT: vle32.v v10, (a4) +; CHECK-NEXT: sub a2, a2, a3 +; CHECK-NEXT: vfredmax.vs v8, v10, v8 +; CHECK-NEXT: add a1, a1, a3 +; CHECK-NEXT: bnez a2, .LBB10_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: ret +entry: + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %trip.count = phi i64 [ 1024, %entry ], [ %remaining.trip.count, %vector.body ] + %scalar.ind = phi i64 [ 0, %entry ], [ %next.ind, %vector.body ] + %red.phi = phi float [ 0.000000e+00, %entry ], [ %red, %vector.body ] + %evl = tail call i32 @llvm.experimental.get.vector.length.i64(i64 %trip.count, i32 4, i1 true) + %evl2 = zext i32 %evl to i64 + %arrayidx6 = getelementptr inbounds float, ptr %a, i64 %scalar.ind + %wide.load = tail call @llvm.vp.load.nxv4f32.p0(ptr %arrayidx6, splat (i1 true), i32 %evl) + %red = tail call float @llvm.vp.reduce.fmax.nxv4f32(float %red.phi, %wide.load, splat (i1 true), i32 %evl) + %remaining.trip.count = sub nuw i64 %trip.count, %evl2 + %next.ind = add i64 %scalar.ind, %evl2 + %m = icmp eq i64 %remaining.trip.count, 0 + br i1 %m, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret float %red +} + +define float @vp_reduce_fmin(ptr %a) { +; CHECK-LABEL: vp_reduce_fmin: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: .LBB11_1: # %vector.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vsetvli a3, a2, e32, m2, ta, ma +; CHECK-NEXT: slli a4, a1, 2 +; CHECK-NEXT: add a4, a0, a4 +; CHECK-NEXT: vle32.v v10, (a4) +; CHECK-NEXT: sub a2, a2, a3 +; CHECK-NEXT: vfredmin.vs v8, v10, v8 +; CHECK-NEXT: add a1, a1, a3 +; CHECK-NEXT: bnez a2, .LBB11_1 +; CHECK-NEXT: # %bb.2: # %for.cond.cleanup +; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: ret +entry: + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %trip.count = phi i64 [ 1024, %entry ], [ %remaining.trip.count, %vector.body ] + %scalar.ind = phi i64 [ 0, %entry ], [ %next.ind, %vector.body ] + %red.phi = phi float [ 0.000000e+00, %entry ], [ %red, %vector.body ] + %evl = tail call i32 @llvm.experimental.get.vector.length.i64(i64 %trip.count, i32 4, i1 true) + %evl2 = zext i32 %evl to i64 + %arrayidx6 = getelementptr inbounds float, ptr %a, i64 %scalar.ind + %wide.load = tail call @llvm.vp.load.nxv4f32.p0(ptr %arrayidx6, splat (i1 true), i32 %evl) + %red = tail call float @llvm.vp.reduce.fmin.nxv4f32(float %red.phi, %wide.load, splat (i1 true), i32 %evl) + %remaining.trip.count = sub nuw i64 %trip.count, %evl2 + %next.ind = add i64 %scalar.ind, %evl2 + %m = icmp eq i64 %remaining.trip.count, 0 + br i1 %m, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret float %red +} diff --git a/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare.ll b/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare.ll index 006fc269050b0..8967fb8bf01ac 100644 --- a/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare.ll +++ b/llvm/test/CodeGen/RISCV/rvv/riscv-codegenprepare.ll @@ -44,3 +44,487 @@ vector.body: exit: ret float %acc } + +define i32 @vp_reduce_add(ptr %a) { +; CHECK-LABEL: define i32 @vp_reduce_add( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[TRIP_COUNT:%.*]] = phi i64 [ 1024, [[ENTRY:%.*]] ], [ [[REMAINING_TRIP_COUNT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[SCALAR_IND:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXT_IND:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi [ insertelement ( poison, i32 0, i64 0), [[ENTRY]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[EVL:%.*]] = tail call i32 @llvm.experimental.get.vector.length.i64(i64 [[TRIP_COUNT]], i32 4, i1 true) +; CHECK-NEXT: [[EVL2:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[SCALAR_IND]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = tail call @llvm.vp.load.nxv4i32.p0(ptr [[ARRAYIDX6]], splat (i1 true), i32 [[EVL]]) +; CHECK-NEXT: [[TMP1:%.*]] = extractelement [[TMP0]], i64 0 +; CHECK-NEXT: [[RED:%.*]] = tail call i32 @llvm.vp.reduce.add.nxv4i32(i32 [[TMP1]], [[WIDE_LOAD]], splat (i1 true), i32 [[EVL]]) +; CHECK-NEXT: [[REMAINING_TRIP_COUNT]] = sub nuw i64 [[TRIP_COUNT]], [[EVL2]] +; CHECK-NEXT: [[NEXT_IND]] = add i64 [[SCALAR_IND]], [[EVL2]] +; CHECK-NEXT: [[M:%.*]] = icmp eq i64 [[REMAINING_TRIP_COUNT]], 0 +; CHECK-NEXT: [[TMP2]] = insertelement poison, i32 [[RED]], i64 0 +; CHECK-NEXT: br i1 [[M]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret i32 [[RED]] +; +entry: + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %trip.count = phi i64 [ 1024, %entry ], [ %remaining.trip.count, %vector.body ] + %scalar.ind = phi i64 [ 0, %entry ], [ %next.ind, %vector.body ] + %red.phi = phi i32 [ 0, %entry ], [ %red, %vector.body ] + %evl = tail call i32 @llvm.experimental.get.vector.length.i64(i64 %trip.count, i32 4, i1 true) + %evl2 = zext i32 %evl to i64 + %arrayidx6 = getelementptr inbounds i32, ptr %a, i64 %scalar.ind + %wide.load = tail call @llvm.vp.load.nxv4i32.p0(ptr %arrayidx6, splat (i1 true), i32 %evl) + %red = tail call i32 @llvm.vp.reduce.add.nxv4i32(i32 %red.phi, %wide.load, splat (i1 true), i32 %evl) + %remaining.trip.count = sub nuw i64 %trip.count, %evl2 + %next.ind = add i64 %scalar.ind, %evl2 + %m = icmp eq i64 %remaining.trip.count, 0 + br i1 %m, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret i32 %red +} + +define i32 @vp_reduce_and(ptr %a) { +; CHECK-LABEL: define i32 @vp_reduce_and( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[TRIP_COUNT:%.*]] = phi i64 [ 1024, [[ENTRY:%.*]] ], [ [[REMAINING_TRIP_COUNT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[SCALAR_IND:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXT_IND:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi [ insertelement ( poison, i32 -2147483648, i64 0), [[ENTRY]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[EVL:%.*]] = tail call i32 @llvm.experimental.get.vector.length.i64(i64 [[TRIP_COUNT]], i32 4, i1 true) +; CHECK-NEXT: [[EVL2:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[SCALAR_IND]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = tail call @llvm.vp.load.nxv4i32.p0(ptr [[ARRAYIDX6]], splat (i1 true), i32 [[EVL]]) +; CHECK-NEXT: [[TMP1:%.*]] = extractelement [[TMP0]], i64 0 +; CHECK-NEXT: [[RED:%.*]] = tail call i32 @llvm.vp.reduce.and.nxv4i32(i32 [[TMP1]], [[WIDE_LOAD]], splat (i1 true), i32 [[EVL]]) +; CHECK-NEXT: [[REMAINING_TRIP_COUNT]] = sub nuw i64 [[TRIP_COUNT]], [[EVL2]] +; CHECK-NEXT: [[NEXT_IND]] = add i64 [[SCALAR_IND]], [[EVL2]] +; CHECK-NEXT: [[M:%.*]] = icmp eq i64 [[REMAINING_TRIP_COUNT]], 0 +; CHECK-NEXT: [[TMP2]] = insertelement poison, i32 [[RED]], i64 0 +; CHECK-NEXT: br i1 [[M]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret i32 [[RED]] +; +entry: + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %trip.count = phi i64 [ 1024, %entry ], [ %remaining.trip.count, %vector.body ] + %scalar.ind = phi i64 [ 0, %entry ], [ %next.ind, %vector.body ] + %red.phi = phi i32 [ -2147483648, %entry ], [ %red, %vector.body ] + %evl = tail call i32 @llvm.experimental.get.vector.length.i64(i64 %trip.count, i32 4, i1 true) + %evl2 = zext i32 %evl to i64 + %arrayidx6 = getelementptr inbounds i32, ptr %a, i64 %scalar.ind + %wide.load = tail call @llvm.vp.load.nxv4i32.p0(ptr %arrayidx6, splat (i1 true), i32 %evl) + %red = tail call i32 @llvm.vp.reduce.and.nxv4i32(i32 %red.phi, %wide.load, splat (i1 true), i32 %evl) + %remaining.trip.count = sub nuw i64 %trip.count, %evl2 + %next.ind = add i64 %scalar.ind, %evl2 + %m = icmp eq i64 %remaining.trip.count, 0 + br i1 %m, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret i32 %red +} + +define i32 @vp_reduce_or(ptr %a) { +; CHECK-LABEL: define i32 @vp_reduce_or( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[TRIP_COUNT:%.*]] = phi i64 [ 1024, [[ENTRY:%.*]] ], [ [[REMAINING_TRIP_COUNT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[SCALAR_IND:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXT_IND:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi [ insertelement ( poison, i32 0, i64 0), [[ENTRY]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[EVL:%.*]] = tail call i32 @llvm.experimental.get.vector.length.i64(i64 [[TRIP_COUNT]], i32 4, i1 true) +; CHECK-NEXT: [[EVL2:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[SCALAR_IND]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = tail call @llvm.vp.load.nxv4i32.p0(ptr [[ARRAYIDX6]], splat (i1 true), i32 [[EVL]]) +; CHECK-NEXT: [[TMP1:%.*]] = extractelement [[TMP0]], i64 0 +; CHECK-NEXT: [[RED:%.*]] = tail call i32 @llvm.vp.reduce.or.nxv4i32(i32 [[TMP1]], [[WIDE_LOAD]], splat (i1 true), i32 [[EVL]]) +; CHECK-NEXT: [[REMAINING_TRIP_COUNT]] = sub nuw i64 [[TRIP_COUNT]], [[EVL2]] +; CHECK-NEXT: [[NEXT_IND]] = add i64 [[SCALAR_IND]], [[EVL2]] +; CHECK-NEXT: [[M:%.*]] = icmp eq i64 [[REMAINING_TRIP_COUNT]], 0 +; CHECK-NEXT: [[TMP2]] = insertelement poison, i32 [[RED]], i64 0 +; CHECK-NEXT: br i1 [[M]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret i32 [[RED]] +; +entry: + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %trip.count = phi i64 [ 1024, %entry ], [ %remaining.trip.count, %vector.body ] + %scalar.ind = phi i64 [ 0, %entry ], [ %next.ind, %vector.body ] + %red.phi = phi i32 [ 0, %entry ], [ %red, %vector.body ] + %evl = tail call i32 @llvm.experimental.get.vector.length.i64(i64 %trip.count, i32 4, i1 true) + %evl2 = zext i32 %evl to i64 + %arrayidx6 = getelementptr inbounds i32, ptr %a, i64 %scalar.ind + %wide.load = tail call @llvm.vp.load.nxv4i32.p0(ptr %arrayidx6, splat (i1 true), i32 %evl) + %red = tail call i32 @llvm.vp.reduce.or.nxv4i32(i32 %red.phi, %wide.load, splat (i1 true), i32 %evl) + %remaining.trip.count = sub nuw i64 %trip.count, %evl2 + %next.ind = add i64 %scalar.ind, %evl2 + %m = icmp eq i64 %remaining.trip.count, 0 + br i1 %m, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret i32 %red +} + +define i32 @vp_reduce_xor(ptr %a) { +; CHECK-LABEL: define i32 @vp_reduce_xor( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[TRIP_COUNT:%.*]] = phi i64 [ 1024, [[ENTRY:%.*]] ], [ [[REMAINING_TRIP_COUNT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[SCALAR_IND:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXT_IND:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi [ insertelement ( poison, i32 0, i64 0), [[ENTRY]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[EVL:%.*]] = tail call i32 @llvm.experimental.get.vector.length.i64(i64 [[TRIP_COUNT]], i32 4, i1 true) +; CHECK-NEXT: [[EVL2:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[SCALAR_IND]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = tail call @llvm.vp.load.nxv4i32.p0(ptr [[ARRAYIDX6]], splat (i1 true), i32 [[EVL]]) +; CHECK-NEXT: [[TMP1:%.*]] = extractelement [[TMP0]], i64 0 +; CHECK-NEXT: [[RED:%.*]] = tail call i32 @llvm.vp.reduce.xor.nxv4i32(i32 [[TMP1]], [[WIDE_LOAD]], splat (i1 true), i32 [[EVL]]) +; CHECK-NEXT: [[REMAINING_TRIP_COUNT]] = sub nuw i64 [[TRIP_COUNT]], [[EVL2]] +; CHECK-NEXT: [[NEXT_IND]] = add i64 [[SCALAR_IND]], [[EVL2]] +; CHECK-NEXT: [[M:%.*]] = icmp eq i64 [[REMAINING_TRIP_COUNT]], 0 +; CHECK-NEXT: [[TMP2]] = insertelement poison, i32 [[RED]], i64 0 +; CHECK-NEXT: br i1 [[M]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret i32 [[RED]] +; +entry: + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %trip.count = phi i64 [ 1024, %entry ], [ %remaining.trip.count, %vector.body ] + %scalar.ind = phi i64 [ 0, %entry ], [ %next.ind, %vector.body ] + %red.phi = phi i32 [ 0, %entry ], [ %red, %vector.body ] + %evl = tail call i32 @llvm.experimental.get.vector.length.i64(i64 %trip.count, i32 4, i1 true) + %evl2 = zext i32 %evl to i64 + %arrayidx6 = getelementptr inbounds i32, ptr %a, i64 %scalar.ind + %wide.load = tail call @llvm.vp.load.nxv4i32.p0(ptr %arrayidx6, splat (i1 true), i32 %evl) + %red = tail call i32 @llvm.vp.reduce.xor.nxv4i32(i32 %red.phi, %wide.load, splat (i1 true), i32 %evl) + %remaining.trip.count = sub nuw i64 %trip.count, %evl2 + %next.ind = add i64 %scalar.ind, %evl2 + %m = icmp eq i64 %remaining.trip.count, 0 + br i1 %m, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret i32 %red +} + +define i32 @vp_reduce_smax(ptr %a) { +; CHECK-LABEL: define i32 @vp_reduce_smax( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[TRIP_COUNT:%.*]] = phi i64 [ 1024, [[ENTRY:%.*]] ], [ [[REMAINING_TRIP_COUNT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[SCALAR_IND:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXT_IND:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi [ insertelement ( poison, i32 -2147483648, i64 0), [[ENTRY]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[EVL:%.*]] = tail call i32 @llvm.experimental.get.vector.length.i64(i64 [[TRIP_COUNT]], i32 4, i1 true) +; CHECK-NEXT: [[EVL2:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[SCALAR_IND]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = tail call @llvm.vp.load.nxv4i32.p0(ptr [[ARRAYIDX6]], splat (i1 true), i32 [[EVL]]) +; CHECK-NEXT: [[TMP1:%.*]] = extractelement [[TMP0]], i64 0 +; CHECK-NEXT: [[RED:%.*]] = tail call i32 @llvm.vp.reduce.smax.nxv4i32(i32 [[TMP1]], [[WIDE_LOAD]], splat (i1 true), i32 [[EVL]]) +; CHECK-NEXT: [[REMAINING_TRIP_COUNT]] = sub nuw i64 [[TRIP_COUNT]], [[EVL2]] +; CHECK-NEXT: [[NEXT_IND]] = add i64 [[SCALAR_IND]], [[EVL2]] +; CHECK-NEXT: [[M:%.*]] = icmp eq i64 [[REMAINING_TRIP_COUNT]], 0 +; CHECK-NEXT: [[TMP2]] = insertelement poison, i32 [[RED]], i64 0 +; CHECK-NEXT: br i1 [[M]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret i32 [[RED]] +; +entry: + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %trip.count = phi i64 [ 1024, %entry ], [ %remaining.trip.count, %vector.body ] + %scalar.ind = phi i64 [ 0, %entry ], [ %next.ind, %vector.body ] + %red.phi = phi i32 [ -2147483648, %entry ], [ %red, %vector.body ] + %evl = tail call i32 @llvm.experimental.get.vector.length.i64(i64 %trip.count, i32 4, i1 true) + %evl2 = zext i32 %evl to i64 + %arrayidx6 = getelementptr inbounds i32, ptr %a, i64 %scalar.ind + %wide.load = tail call @llvm.vp.load.nxv4i32.p0(ptr %arrayidx6, splat (i1 true), i32 %evl) + %red = tail call i32 @llvm.vp.reduce.smax.nxv4i32(i32 %red.phi, %wide.load, splat (i1 true), i32 %evl) + %remaining.trip.count = sub nuw i64 %trip.count, %evl2 + %next.ind = add i64 %scalar.ind, %evl2 + %m = icmp eq i64 %remaining.trip.count, 0 + br i1 %m, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret i32 %red +} + +define i32 @vp_reduce_smin(ptr %a) { +; CHECK-LABEL: define i32 @vp_reduce_smin( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[TRIP_COUNT:%.*]] = phi i64 [ 1024, [[ENTRY:%.*]] ], [ [[REMAINING_TRIP_COUNT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[SCALAR_IND:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXT_IND:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi [ insertelement ( poison, i32 2147483647, i64 0), [[ENTRY]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[EVL:%.*]] = tail call i32 @llvm.experimental.get.vector.length.i64(i64 [[TRIP_COUNT]], i32 4, i1 true) +; CHECK-NEXT: [[EVL2:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[SCALAR_IND]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = tail call @llvm.vp.load.nxv4i32.p0(ptr [[ARRAYIDX6]], splat (i1 true), i32 [[EVL]]) +; CHECK-NEXT: [[TMP1:%.*]] = extractelement [[TMP0]], i64 0 +; CHECK-NEXT: [[RED:%.*]] = tail call i32 @llvm.vp.reduce.smin.nxv4i32(i32 [[TMP1]], [[WIDE_LOAD]], splat (i1 true), i32 [[EVL]]) +; CHECK-NEXT: [[REMAINING_TRIP_COUNT]] = sub nuw i64 [[TRIP_COUNT]], [[EVL2]] +; CHECK-NEXT: [[NEXT_IND]] = add i64 [[SCALAR_IND]], [[EVL2]] +; CHECK-NEXT: [[M:%.*]] = icmp eq i64 [[REMAINING_TRIP_COUNT]], 0 +; CHECK-NEXT: [[TMP2]] = insertelement poison, i32 [[RED]], i64 0 +; CHECK-NEXT: br i1 [[M]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret i32 [[RED]] +; +entry: + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %trip.count = phi i64 [ 1024, %entry ], [ %remaining.trip.count, %vector.body ] + %scalar.ind = phi i64 [ 0, %entry ], [ %next.ind, %vector.body ] + %red.phi = phi i32 [ 2147483647, %entry ], [ %red, %vector.body ] + %evl = tail call i32 @llvm.experimental.get.vector.length.i64(i64 %trip.count, i32 4, i1 true) + %evl2 = zext i32 %evl to i64 + %arrayidx6 = getelementptr inbounds i32, ptr %a, i64 %scalar.ind + %wide.load = tail call @llvm.vp.load.nxv4i32.p0(ptr %arrayidx6, splat (i1 true), i32 %evl) + %red = tail call i32 @llvm.vp.reduce.smin.nxv4i32(i32 %red.phi, %wide.load, splat (i1 true), i32 %evl) + %remaining.trip.count = sub nuw i64 %trip.count, %evl2 + %next.ind = add i64 %scalar.ind, %evl2 + %m = icmp eq i64 %remaining.trip.count, 0 + br i1 %m, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret i32 %red +} + +define i32 @vp_reduce_umax(ptr %a) { +; CHECK-LABEL: define i32 @vp_reduce_umax( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[TRIP_COUNT:%.*]] = phi i64 [ 1024, [[ENTRY:%.*]] ], [ [[REMAINING_TRIP_COUNT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[SCALAR_IND:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXT_IND:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi [ insertelement ( poison, i32 0, i64 0), [[ENTRY]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[EVL:%.*]] = tail call i32 @llvm.experimental.get.vector.length.i64(i64 [[TRIP_COUNT]], i32 4, i1 true) +; CHECK-NEXT: [[EVL2:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[SCALAR_IND]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = tail call @llvm.vp.load.nxv4i32.p0(ptr [[ARRAYIDX6]], splat (i1 true), i32 [[EVL]]) +; CHECK-NEXT: [[TMP1:%.*]] = extractelement [[TMP0]], i64 0 +; CHECK-NEXT: [[RED:%.*]] = tail call i32 @llvm.vp.reduce.umax.nxv4i32(i32 [[TMP1]], [[WIDE_LOAD]], splat (i1 true), i32 [[EVL]]) +; CHECK-NEXT: [[REMAINING_TRIP_COUNT]] = sub nuw i64 [[TRIP_COUNT]], [[EVL2]] +; CHECK-NEXT: [[NEXT_IND]] = add i64 [[SCALAR_IND]], [[EVL2]] +; CHECK-NEXT: [[M:%.*]] = icmp eq i64 [[REMAINING_TRIP_COUNT]], 0 +; CHECK-NEXT: [[TMP2]] = insertelement poison, i32 [[RED]], i64 0 +; CHECK-NEXT: br i1 [[M]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret i32 [[RED]] +; +entry: + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %trip.count = phi i64 [ 1024, %entry ], [ %remaining.trip.count, %vector.body ] + %scalar.ind = phi i64 [ 0, %entry ], [ %next.ind, %vector.body ] + %red.phi = phi i32 [ 0, %entry ], [ %red, %vector.body ] + %evl = tail call i32 @llvm.experimental.get.vector.length.i64(i64 %trip.count, i32 4, i1 true) + %evl2 = zext i32 %evl to i64 + %arrayidx6 = getelementptr inbounds i32, ptr %a, i64 %scalar.ind + %wide.load = tail call @llvm.vp.load.nxv4i32.p0(ptr %arrayidx6, splat (i1 true), i32 %evl) + %red = tail call i32 @llvm.vp.reduce.umax.nxv4i32(i32 %red.phi, %wide.load, splat (i1 true), i32 %evl) + %remaining.trip.count = sub nuw i64 %trip.count, %evl2 + %next.ind = add i64 %scalar.ind, %evl2 + %m = icmp eq i64 %remaining.trip.count, 0 + br i1 %m, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret i32 %red +} + +define i32 @vp_reduce_umin(ptr %a) { +; CHECK-LABEL: define i32 @vp_reduce_umin( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[TRIP_COUNT:%.*]] = phi i64 [ 1024, [[ENTRY:%.*]] ], [ [[REMAINING_TRIP_COUNT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[SCALAR_IND:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXT_IND:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi [ insertelement ( poison, i32 -2147483648, i64 0), [[ENTRY]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[EVL:%.*]] = tail call i32 @llvm.experimental.get.vector.length.i64(i64 [[TRIP_COUNT]], i32 4, i1 true) +; CHECK-NEXT: [[EVL2:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[SCALAR_IND]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = tail call @llvm.vp.load.nxv4i32.p0(ptr [[ARRAYIDX6]], splat (i1 true), i32 [[EVL]]) +; CHECK-NEXT: [[TMP1:%.*]] = extractelement [[TMP0]], i64 0 +; CHECK-NEXT: [[RED:%.*]] = tail call i32 @llvm.vp.reduce.umin.nxv4i32(i32 [[TMP1]], [[WIDE_LOAD]], splat (i1 true), i32 [[EVL]]) +; CHECK-NEXT: [[REMAINING_TRIP_COUNT]] = sub nuw i64 [[TRIP_COUNT]], [[EVL2]] +; CHECK-NEXT: [[NEXT_IND]] = add i64 [[SCALAR_IND]], [[EVL2]] +; CHECK-NEXT: [[M:%.*]] = icmp eq i64 [[REMAINING_TRIP_COUNT]], 0 +; CHECK-NEXT: [[TMP2]] = insertelement poison, i32 [[RED]], i64 0 +; CHECK-NEXT: br i1 [[M]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret i32 [[RED]] +; +entry: + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %trip.count = phi i64 [ 1024, %entry ], [ %remaining.trip.count, %vector.body ] + %scalar.ind = phi i64 [ 0, %entry ], [ %next.ind, %vector.body ] + %red.phi = phi i32 [ -2147483648, %entry ], [ %red, %vector.body ] + %evl = tail call i32 @llvm.experimental.get.vector.length.i64(i64 %trip.count, i32 4, i1 true) + %evl2 = zext i32 %evl to i64 + %arrayidx6 = getelementptr inbounds i32, ptr %a, i64 %scalar.ind + %wide.load = tail call @llvm.vp.load.nxv4i32.p0(ptr %arrayidx6, splat (i1 true), i32 %evl) + %red = tail call i32 @llvm.vp.reduce.umin.nxv4i32(i32 %red.phi, %wide.load, splat (i1 true), i32 %evl) + %remaining.trip.count = sub nuw i64 %trip.count, %evl2 + %next.ind = add i64 %scalar.ind, %evl2 + %m = icmp eq i64 %remaining.trip.count, 0 + br i1 %m, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret i32 %red +} + +define float @vp_reduce_fadd(ptr %a) { +; CHECK-LABEL: define float @vp_reduce_fadd( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[TRIP_COUNT:%.*]] = phi i64 [ 1024, [[ENTRY:%.*]] ], [ [[REMAINING_TRIP_COUNT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[SCALAR_IND:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXT_IND:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi [ insertelement ( poison, float 0.000000e+00, i64 0), [[ENTRY]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[EVL:%.*]] = tail call i32 @llvm.experimental.get.vector.length.i64(i64 [[TRIP_COUNT]], i32 4, i1 true) +; CHECK-NEXT: [[EVL2:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[SCALAR_IND]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = tail call @llvm.vp.load.nxv4f32.p0(ptr [[ARRAYIDX6]], splat (i1 true), i32 [[EVL]]) +; CHECK-NEXT: [[TMP1:%.*]] = extractelement [[TMP0]], i64 0 +; CHECK-NEXT: [[RED:%.*]] = tail call float @llvm.vp.reduce.fadd.nxv4f32(float [[TMP1]], [[WIDE_LOAD]], splat (i1 true), i32 [[EVL]]) +; CHECK-NEXT: [[REMAINING_TRIP_COUNT]] = sub nuw i64 [[TRIP_COUNT]], [[EVL2]] +; CHECK-NEXT: [[NEXT_IND]] = add i64 [[SCALAR_IND]], [[EVL2]] +; CHECK-NEXT: [[M:%.*]] = icmp eq i64 [[REMAINING_TRIP_COUNT]], 0 +; CHECK-NEXT: [[TMP2]] = insertelement poison, float [[RED]], i64 0 +; CHECK-NEXT: br i1 [[M]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret float [[RED]] +; +entry: + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %trip.count = phi i64 [ 1024, %entry ], [ %remaining.trip.count, %vector.body ] + %scalar.ind = phi i64 [ 0, %entry ], [ %next.ind, %vector.body ] + %red.phi = phi float [ 0.000000e+00, %entry ], [ %red, %vector.body ] + %evl = tail call i32 @llvm.experimental.get.vector.length.i64(i64 %trip.count, i32 4, i1 true) + %evl2 = zext i32 %evl to i64 + %arrayidx6 = getelementptr inbounds float, ptr %a, i64 %scalar.ind + %wide.load = tail call @llvm.vp.load.nxv4f32.p0(ptr %arrayidx6, splat (i1 true), i32 %evl) + %red = tail call float @llvm.vp.reduce.fadd.nxv4f32(float %red.phi, %wide.load, splat (i1 true), i32 %evl) + %remaining.trip.count = sub nuw i64 %trip.count, %evl2 + %next.ind = add i64 %scalar.ind, %evl2 + %m = icmp eq i64 %remaining.trip.count, 0 + br i1 %m, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret float %red +} + +define float @vp_reduce_fmax(ptr %a) { +; CHECK-LABEL: define float @vp_reduce_fmax( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[TRIP_COUNT:%.*]] = phi i64 [ 1024, [[ENTRY:%.*]] ], [ [[REMAINING_TRIP_COUNT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[SCALAR_IND:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXT_IND:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi [ insertelement ( poison, float 0.000000e+00, i64 0), [[ENTRY]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[EVL:%.*]] = tail call i32 @llvm.experimental.get.vector.length.i64(i64 [[TRIP_COUNT]], i32 4, i1 true) +; CHECK-NEXT: [[EVL2:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[SCALAR_IND]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = tail call @llvm.vp.load.nxv4f32.p0(ptr [[ARRAYIDX6]], splat (i1 true), i32 [[EVL]]) +; CHECK-NEXT: [[TMP1:%.*]] = extractelement [[TMP0]], i64 0 +; CHECK-NEXT: [[RED:%.*]] = tail call float @llvm.vp.reduce.fmax.nxv4f32(float [[TMP1]], [[WIDE_LOAD]], splat (i1 true), i32 [[EVL]]) +; CHECK-NEXT: [[REMAINING_TRIP_COUNT]] = sub nuw i64 [[TRIP_COUNT]], [[EVL2]] +; CHECK-NEXT: [[NEXT_IND]] = add i64 [[SCALAR_IND]], [[EVL2]] +; CHECK-NEXT: [[M:%.*]] = icmp eq i64 [[REMAINING_TRIP_COUNT]], 0 +; CHECK-NEXT: [[TMP2]] = insertelement poison, float [[RED]], i64 0 +; CHECK-NEXT: br i1 [[M]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret float [[RED]] +; +entry: + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %trip.count = phi i64 [ 1024, %entry ], [ %remaining.trip.count, %vector.body ] + %scalar.ind = phi i64 [ 0, %entry ], [ %next.ind, %vector.body ] + %red.phi = phi float [ 0.000000e+00, %entry ], [ %red, %vector.body ] + %evl = tail call i32 @llvm.experimental.get.vector.length.i64(i64 %trip.count, i32 4, i1 true) + %evl2 = zext i32 %evl to i64 + %arrayidx6 = getelementptr inbounds float, ptr %a, i64 %scalar.ind + %wide.load = tail call @llvm.vp.load.nxv4f32.p0(ptr %arrayidx6, splat (i1 true), i32 %evl) + %red = tail call float @llvm.vp.reduce.fmax.nxv4f32(float %red.phi, %wide.load, splat (i1 true), i32 %evl) + %remaining.trip.count = sub nuw i64 %trip.count, %evl2 + %next.ind = add i64 %scalar.ind, %evl2 + %m = icmp eq i64 %remaining.trip.count, 0 + br i1 %m, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret float %red +} + +define float @vp_reduce_fmin(ptr %a) { +; CHECK-LABEL: define float @vp_reduce_fmin( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[TRIP_COUNT:%.*]] = phi i64 [ 1024, [[ENTRY:%.*]] ], [ [[REMAINING_TRIP_COUNT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[SCALAR_IND:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXT_IND:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi [ insertelement ( poison, float 0.000000e+00, i64 0), [[ENTRY]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[EVL:%.*]] = tail call i32 @llvm.experimental.get.vector.length.i64(i64 [[TRIP_COUNT]], i32 4, i1 true) +; CHECK-NEXT: [[EVL2:%.*]] = zext i32 [[EVL]] to i64 +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[SCALAR_IND]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = tail call @llvm.vp.load.nxv4f32.p0(ptr [[ARRAYIDX6]], splat (i1 true), i32 [[EVL]]) +; CHECK-NEXT: [[TMP1:%.*]] = extractelement [[TMP0]], i64 0 +; CHECK-NEXT: [[RED:%.*]] = tail call float @llvm.vp.reduce.fmin.nxv4f32(float [[TMP1]], [[WIDE_LOAD]], splat (i1 true), i32 [[EVL]]) +; CHECK-NEXT: [[REMAINING_TRIP_COUNT]] = sub nuw i64 [[TRIP_COUNT]], [[EVL2]] +; CHECK-NEXT: [[NEXT_IND]] = add i64 [[SCALAR_IND]], [[EVL2]] +; CHECK-NEXT: [[M:%.*]] = icmp eq i64 [[REMAINING_TRIP_COUNT]], 0 +; CHECK-NEXT: [[TMP2]] = insertelement poison, float [[RED]], i64 0 +; CHECK-NEXT: br i1 [[M]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret float [[RED]] +; +entry: + br label %vector.body + +vector.body: ; preds = %vector.body, %entry + %trip.count = phi i64 [ 1024, %entry ], [ %remaining.trip.count, %vector.body ] + %scalar.ind = phi i64 [ 0, %entry ], [ %next.ind, %vector.body ] + %red.phi = phi float [ 0.000000e+00, %entry ], [ %red, %vector.body ] + %evl = tail call i32 @llvm.experimental.get.vector.length.i64(i64 %trip.count, i32 4, i1 true) + %evl2 = zext i32 %evl to i64 + %arrayidx6 = getelementptr inbounds float, ptr %a, i64 %scalar.ind + %wide.load = tail call @llvm.vp.load.nxv4f32.p0(ptr %arrayidx6, splat (i1 true), i32 %evl) + %red = tail call float @llvm.vp.reduce.fmin.nxv4f32(float %red.phi, %wide.load, splat (i1 true), i32 %evl) + %remaining.trip.count = sub nuw i64 %trip.count, %evl2 + %next.ind = add i64 %scalar.ind, %evl2 + %m = icmp eq i64 %remaining.trip.count, 0 + br i1 %m, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body + ret float %red +} diff --git a/llvm/test/CodeGen/Thumb2/tpsoft.ll b/llvm/test/CodeGen/Thumb2/tpsoft.ll index 2454bd0a98580..7222fcfa532c5 100644 --- a/llvm/test/CodeGen/Thumb2/tpsoft.ll +++ b/llvm/test/CodeGen/Thumb2/tpsoft.ll @@ -47,7 +47,7 @@ define arm_aapcs_vfpcc i32 @main() nounwind { ; ELFASM-NEXT: @ %bb.4: ; ELFASM-NEXT: .LCPI0_0: ; ELFASM-NEXT: .Ltmp0: -; ELFASM-NEXT: .long i(GOTTPOFF)-((.LPC0_0+4)-.Ltmp0) +; ELFASM-NEXT: .long i(GOTTPOFF)-(.LPC0_0+4-.Ltmp0) entry: %0 = load i32, ptr @i, align 4 switch i32 %0, label %bb2 [ diff --git a/llvm/test/CodeGen/X86/abi-isel.ll b/llvm/test/CodeGen/X86/abi-isel.ll index 9f0d89f20ceb4..2ac392c729d19 100644 --- a/llvm/test/CodeGen/X86/abi-isel.ll +++ b/llvm/test/CodeGen/X86/abi-isel.ll @@ -1656,8 +1656,8 @@ define dso_local void @qux03() nounwind { ; DARWIN-32-PIC-NEXT: calll L18$pb ; DARWIN-32-PIC-NEXT: L18$pb: ; DARWIN-32-PIC-NEXT: popl %eax -; DARWIN-32-PIC-NEXT: movl (_dsrc-L18$pb)+64(%eax), %ecx -; DARWIN-32-PIC-NEXT: movl %ecx, (_ddst-L18$pb)+64(%eax) +; DARWIN-32-PIC-NEXT: movl _dsrc-L18$pb+64(%eax), %ecx +; DARWIN-32-PIC-NEXT: movl %ecx, _ddst-L18$pb+64(%eax) ; DARWIN-32-PIC-NEXT: retl ; ; DARWIN-64-STATIC-LABEL: qux03: @@ -1727,7 +1727,7 @@ define dso_local void @qux04() nounwind { ; DARWIN-32-PIC-NEXT: calll L19$pb ; DARWIN-32-PIC-NEXT: L19$pb: ; DARWIN-32-PIC-NEXT: popl %eax -; DARWIN-32-PIC-NEXT: leal (_ddst-L19$pb)+64(%eax), %ecx +; DARWIN-32-PIC-NEXT: leal _ddst-L19$pb+64(%eax), %ecx ; DARWIN-32-PIC-NEXT: movl %ecx, _dptr-L19$pb(%eax) ; DARWIN-32-PIC-NEXT: retl ; @@ -1807,7 +1807,7 @@ define dso_local void @qux05() nounwind { ; DARWIN-32-PIC-NEXT: calll L20$pb ; DARWIN-32-PIC-NEXT: L20$pb: ; DARWIN-32-PIC-NEXT: popl %eax -; DARWIN-32-PIC-NEXT: movl (_dsrc-L20$pb)+64(%eax), %ecx +; DARWIN-32-PIC-NEXT: movl _dsrc-L20$pb+64(%eax), %ecx ; DARWIN-32-PIC-NEXT: movl _dptr-L20$pb(%eax), %eax ; DARWIN-32-PIC-NEXT: movl %ecx, 64(%eax) ; DARWIN-32-PIC-NEXT: retl @@ -1888,8 +1888,8 @@ define dso_local void @qux06() nounwind { ; DARWIN-32-PIC-NEXT: calll L21$pb ; DARWIN-32-PIC-NEXT: L21$pb: ; DARWIN-32-PIC-NEXT: popl %eax -; DARWIN-32-PIC-NEXT: movl (_lsrc-L21$pb)+64(%eax), %ecx -; DARWIN-32-PIC-NEXT: movl %ecx, (_ldst-L21$pb)+64(%eax) +; DARWIN-32-PIC-NEXT: movl _lsrc-L21$pb+64(%eax), %ecx +; DARWIN-32-PIC-NEXT: movl %ecx, _ldst-L21$pb+64(%eax) ; DARWIN-32-PIC-NEXT: retl ; ; DARWIN-64-STATIC-LABEL: qux06: @@ -1959,7 +1959,7 @@ define dso_local void @qux07() nounwind { ; DARWIN-32-PIC-NEXT: calll L22$pb ; DARWIN-32-PIC-NEXT: L22$pb: ; DARWIN-32-PIC-NEXT: popl %eax -; DARWIN-32-PIC-NEXT: leal (_ldst-L22$pb)+64(%eax), %ecx +; DARWIN-32-PIC-NEXT: leal _ldst-L22$pb+64(%eax), %ecx ; DARWIN-32-PIC-NEXT: movl %ecx, _lptr-L22$pb(%eax) ; DARWIN-32-PIC-NEXT: retl ; @@ -2039,7 +2039,7 @@ define dso_local void @qux08() nounwind { ; DARWIN-32-PIC-NEXT: calll L23$pb ; DARWIN-32-PIC-NEXT: L23$pb: ; DARWIN-32-PIC-NEXT: popl %eax -; DARWIN-32-PIC-NEXT: movl (_lsrc-L23$pb)+64(%eax), %ecx +; DARWIN-32-PIC-NEXT: movl _lsrc-L23$pb+64(%eax), %ecx ; DARWIN-32-PIC-NEXT: movl _lptr-L23$pb(%eax), %eax ; DARWIN-32-PIC-NEXT: movl %ecx, 64(%eax) ; DARWIN-32-PIC-NEXT: retl @@ -3887,8 +3887,8 @@ define dso_local void @off03(i64 %i) nounwind { ; DARWIN-32-PIC-NEXT: L42$pb: ; DARWIN-32-PIC-NEXT: popl %eax ; DARWIN-32-PIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; DARWIN-32-PIC-NEXT: movl (_dsrc-L42$pb)+64(%eax,%ecx,4), %edx -; DARWIN-32-PIC-NEXT: movl %edx, (_ddst-L42$pb)+64(%eax,%ecx,4) +; DARWIN-32-PIC-NEXT: movl _dsrc-L42$pb+64(%eax,%ecx,4), %edx +; DARWIN-32-PIC-NEXT: movl %edx, _ddst-L42$pb+64(%eax,%ecx,4) ; DARWIN-32-PIC-NEXT: retl ; ; DARWIN-64-STATIC-LABEL: off03: @@ -3977,7 +3977,7 @@ define dso_local void @off04(i64 %i) nounwind { ; DARWIN-32-PIC-NEXT: L43$pb: ; DARWIN-32-PIC-NEXT: popl %eax ; DARWIN-32-PIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; DARWIN-32-PIC-NEXT: leal (_ddst-L43$pb)+64(%eax,%ecx,4), %ecx +; DARWIN-32-PIC-NEXT: leal _ddst-L43$pb+64(%eax,%ecx,4), %ecx ; DARWIN-32-PIC-NEXT: movl %ecx, _dptr-L43$pb(%eax) ; DARWIN-32-PIC-NEXT: retl ; @@ -4068,7 +4068,7 @@ define dso_local void @off05(i64 %i) nounwind { ; DARWIN-32-PIC-NEXT: L44$pb: ; DARWIN-32-PIC-NEXT: popl %eax ; DARWIN-32-PIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; DARWIN-32-PIC-NEXT: movl (_dsrc-L44$pb)+64(%eax,%ecx,4), %edx +; DARWIN-32-PIC-NEXT: movl _dsrc-L44$pb+64(%eax,%ecx,4), %edx ; DARWIN-32-PIC-NEXT: movl _dptr-L44$pb(%eax), %eax ; DARWIN-32-PIC-NEXT: movl %edx, 64(%eax,%ecx,4) ; DARWIN-32-PIC-NEXT: retl @@ -4161,8 +4161,8 @@ define dso_local void @off06(i64 %i) nounwind { ; DARWIN-32-PIC-NEXT: L45$pb: ; DARWIN-32-PIC-NEXT: popl %eax ; DARWIN-32-PIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; DARWIN-32-PIC-NEXT: movl (_lsrc-L45$pb)+64(%eax,%ecx,4), %edx -; DARWIN-32-PIC-NEXT: movl %edx, (_ldst-L45$pb)+64(%eax,%ecx,4) +; DARWIN-32-PIC-NEXT: movl _lsrc-L45$pb+64(%eax,%ecx,4), %edx +; DARWIN-32-PIC-NEXT: movl %edx, _ldst-L45$pb+64(%eax,%ecx,4) ; DARWIN-32-PIC-NEXT: retl ; ; DARWIN-64-STATIC-LABEL: off06: @@ -4251,7 +4251,7 @@ define dso_local void @off07(i64 %i) nounwind { ; DARWIN-32-PIC-NEXT: L46$pb: ; DARWIN-32-PIC-NEXT: popl %eax ; DARWIN-32-PIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; DARWIN-32-PIC-NEXT: leal (_ldst-L46$pb)+64(%eax,%ecx,4), %ecx +; DARWIN-32-PIC-NEXT: leal _ldst-L46$pb+64(%eax,%ecx,4), %ecx ; DARWIN-32-PIC-NEXT: movl %ecx, _lptr-L46$pb(%eax) ; DARWIN-32-PIC-NEXT: retl ; @@ -4342,7 +4342,7 @@ define dso_local void @off08(i64 %i) nounwind { ; DARWIN-32-PIC-NEXT: L47$pb: ; DARWIN-32-PIC-NEXT: popl %eax ; DARWIN-32-PIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; DARWIN-32-PIC-NEXT: movl (_lsrc-L47$pb)+64(%eax,%ecx,4), %edx +; DARWIN-32-PIC-NEXT: movl _lsrc-L47$pb+64(%eax,%ecx,4), %edx ; DARWIN-32-PIC-NEXT: movl _lptr-L47$pb(%eax), %eax ; DARWIN-32-PIC-NEXT: movl %edx, 64(%eax,%ecx,4) ; DARWIN-32-PIC-NEXT: retl @@ -4711,8 +4711,8 @@ define dso_local void @moo03(i64 %i) nounwind { ; DARWIN-32-PIC-NEXT: calll L51$pb ; DARWIN-32-PIC-NEXT: L51$pb: ; DARWIN-32-PIC-NEXT: popl %eax -; DARWIN-32-PIC-NEXT: movl (_dsrc-L51$pb)+262144(%eax), %ecx -; DARWIN-32-PIC-NEXT: movl %ecx, (_ddst-L51$pb)+262144(%eax) +; DARWIN-32-PIC-NEXT: movl _dsrc-L51$pb+262144(%eax), %ecx +; DARWIN-32-PIC-NEXT: movl %ecx, _ddst-L51$pb+262144(%eax) ; DARWIN-32-PIC-NEXT: retl ; ; DARWIN-64-STATIC-LABEL: moo03: @@ -4782,7 +4782,7 @@ define dso_local void @moo04(i64 %i) nounwind { ; DARWIN-32-PIC-NEXT: calll L52$pb ; DARWIN-32-PIC-NEXT: L52$pb: ; DARWIN-32-PIC-NEXT: popl %eax -; DARWIN-32-PIC-NEXT: leal (_ddst-L52$pb)+262144(%eax), %ecx +; DARWIN-32-PIC-NEXT: leal _ddst-L52$pb+262144(%eax), %ecx ; DARWIN-32-PIC-NEXT: movl %ecx, _dptr-L52$pb(%eax) ; DARWIN-32-PIC-NEXT: retl ; @@ -4862,7 +4862,7 @@ define dso_local void @moo05(i64 %i) nounwind { ; DARWIN-32-PIC-NEXT: calll L53$pb ; DARWIN-32-PIC-NEXT: L53$pb: ; DARWIN-32-PIC-NEXT: popl %eax -; DARWIN-32-PIC-NEXT: movl (_dsrc-L53$pb)+262144(%eax), %ecx +; DARWIN-32-PIC-NEXT: movl _dsrc-L53$pb+262144(%eax), %ecx ; DARWIN-32-PIC-NEXT: movl _dptr-L53$pb(%eax), %eax ; DARWIN-32-PIC-NEXT: movl %ecx, 262144(%eax) ; DARWIN-32-PIC-NEXT: retl @@ -4943,8 +4943,8 @@ define dso_local void @moo06(i64 %i) nounwind { ; DARWIN-32-PIC-NEXT: calll L54$pb ; DARWIN-32-PIC-NEXT: L54$pb: ; DARWIN-32-PIC-NEXT: popl %eax -; DARWIN-32-PIC-NEXT: movl (_lsrc-L54$pb)+262144(%eax), %ecx -; DARWIN-32-PIC-NEXT: movl %ecx, (_ldst-L54$pb)+262144(%eax) +; DARWIN-32-PIC-NEXT: movl _lsrc-L54$pb+262144(%eax), %ecx +; DARWIN-32-PIC-NEXT: movl %ecx, _ldst-L54$pb+262144(%eax) ; DARWIN-32-PIC-NEXT: retl ; ; DARWIN-64-STATIC-LABEL: moo06: @@ -5014,7 +5014,7 @@ define dso_local void @moo07(i64 %i) nounwind { ; DARWIN-32-PIC-NEXT: calll L55$pb ; DARWIN-32-PIC-NEXT: L55$pb: ; DARWIN-32-PIC-NEXT: popl %eax -; DARWIN-32-PIC-NEXT: leal (_ldst-L55$pb)+262144(%eax), %ecx +; DARWIN-32-PIC-NEXT: leal _ldst-L55$pb+262144(%eax), %ecx ; DARWIN-32-PIC-NEXT: movl %ecx, _lptr-L55$pb(%eax) ; DARWIN-32-PIC-NEXT: retl ; @@ -5094,7 +5094,7 @@ define dso_local void @moo08(i64 %i) nounwind { ; DARWIN-32-PIC-NEXT: calll L56$pb ; DARWIN-32-PIC-NEXT: L56$pb: ; DARWIN-32-PIC-NEXT: popl %eax -; DARWIN-32-PIC-NEXT: movl (_lsrc-L56$pb)+262144(%eax), %ecx +; DARWIN-32-PIC-NEXT: movl _lsrc-L56$pb+262144(%eax), %ecx ; DARWIN-32-PIC-NEXT: movl _lptr-L56$pb(%eax), %eax ; DARWIN-32-PIC-NEXT: movl %ecx, 262144(%eax) ; DARWIN-32-PIC-NEXT: retl @@ -5488,8 +5488,8 @@ define dso_local void @big03(i64 %i) nounwind { ; DARWIN-32-PIC-NEXT: L60$pb: ; DARWIN-32-PIC-NEXT: popl %eax ; DARWIN-32-PIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; DARWIN-32-PIC-NEXT: movl (_dsrc-L60$pb)+262144(%eax,%ecx,4), %edx -; DARWIN-32-PIC-NEXT: movl %edx, (_ddst-L60$pb)+262144(%eax,%ecx,4) +; DARWIN-32-PIC-NEXT: movl _dsrc-L60$pb+262144(%eax,%ecx,4), %edx +; DARWIN-32-PIC-NEXT: movl %edx, _ddst-L60$pb+262144(%eax,%ecx,4) ; DARWIN-32-PIC-NEXT: retl ; ; DARWIN-64-STATIC-LABEL: big03: @@ -5578,7 +5578,7 @@ define dso_local void @big04(i64 %i) nounwind { ; DARWIN-32-PIC-NEXT: L61$pb: ; DARWIN-32-PIC-NEXT: popl %eax ; DARWIN-32-PIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; DARWIN-32-PIC-NEXT: leal (_ddst-L61$pb)+262144(%eax,%ecx,4), %ecx +; DARWIN-32-PIC-NEXT: leal _ddst-L61$pb+262144(%eax,%ecx,4), %ecx ; DARWIN-32-PIC-NEXT: movl %ecx, _dptr-L61$pb(%eax) ; DARWIN-32-PIC-NEXT: retl ; @@ -5669,7 +5669,7 @@ define dso_local void @big05(i64 %i) nounwind { ; DARWIN-32-PIC-NEXT: L62$pb: ; DARWIN-32-PIC-NEXT: popl %eax ; DARWIN-32-PIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; DARWIN-32-PIC-NEXT: movl (_dsrc-L62$pb)+262144(%eax,%ecx,4), %edx +; DARWIN-32-PIC-NEXT: movl _dsrc-L62$pb+262144(%eax,%ecx,4), %edx ; DARWIN-32-PIC-NEXT: movl _dptr-L62$pb(%eax), %eax ; DARWIN-32-PIC-NEXT: movl %edx, 262144(%eax,%ecx,4) ; DARWIN-32-PIC-NEXT: retl @@ -5762,8 +5762,8 @@ define dso_local void @big06(i64 %i) nounwind { ; DARWIN-32-PIC-NEXT: L63$pb: ; DARWIN-32-PIC-NEXT: popl %eax ; DARWIN-32-PIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; DARWIN-32-PIC-NEXT: movl (_lsrc-L63$pb)+262144(%eax,%ecx,4), %edx -; DARWIN-32-PIC-NEXT: movl %edx, (_ldst-L63$pb)+262144(%eax,%ecx,4) +; DARWIN-32-PIC-NEXT: movl _lsrc-L63$pb+262144(%eax,%ecx,4), %edx +; DARWIN-32-PIC-NEXT: movl %edx, _ldst-L63$pb+262144(%eax,%ecx,4) ; DARWIN-32-PIC-NEXT: retl ; ; DARWIN-64-STATIC-LABEL: big06: @@ -5852,7 +5852,7 @@ define dso_local void @big07(i64 %i) nounwind { ; DARWIN-32-PIC-NEXT: L64$pb: ; DARWIN-32-PIC-NEXT: popl %eax ; DARWIN-32-PIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; DARWIN-32-PIC-NEXT: leal (_ldst-L64$pb)+262144(%eax,%ecx,4), %ecx +; DARWIN-32-PIC-NEXT: leal _ldst-L64$pb+262144(%eax,%ecx,4), %ecx ; DARWIN-32-PIC-NEXT: movl %ecx, _lptr-L64$pb(%eax) ; DARWIN-32-PIC-NEXT: retl ; @@ -5943,7 +5943,7 @@ define dso_local void @big08(i64 %i) nounwind { ; DARWIN-32-PIC-NEXT: L65$pb: ; DARWIN-32-PIC-NEXT: popl %eax ; DARWIN-32-PIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; DARWIN-32-PIC-NEXT: movl (_lsrc-L65$pb)+262144(%eax,%ecx,4), %edx +; DARWIN-32-PIC-NEXT: movl _lsrc-L65$pb+262144(%eax,%ecx,4), %edx ; DARWIN-32-PIC-NEXT: movl _lptr-L65$pb(%eax), %eax ; DARWIN-32-PIC-NEXT: movl %edx, 262144(%eax,%ecx,4) ; DARWIN-32-PIC-NEXT: retl @@ -7787,7 +7787,7 @@ define dso_local ptr @bat03() nounwind { ; DARWIN-32-PIC-NEXT: calll L93$pb ; DARWIN-32-PIC-NEXT: L93$pb: ; DARWIN-32-PIC-NEXT: popl %eax -; DARWIN-32-PIC-NEXT: leal (_dsrc-L93$pb)+64(%eax), %eax +; DARWIN-32-PIC-NEXT: leal _dsrc-L93$pb+64(%eax), %eax ; DARWIN-32-PIC-NEXT: retl ; ; DARWIN-64-STATIC-LABEL: bat03: @@ -7850,7 +7850,7 @@ define dso_local ptr @bat04() nounwind { ; DARWIN-32-PIC-NEXT: calll L94$pb ; DARWIN-32-PIC-NEXT: L94$pb: ; DARWIN-32-PIC-NEXT: popl %eax -; DARWIN-32-PIC-NEXT: leal (_ddst-L94$pb)+64(%eax), %eax +; DARWIN-32-PIC-NEXT: leal _ddst-L94$pb+64(%eax), %eax ; DARWIN-32-PIC-NEXT: retl ; ; DARWIN-64-STATIC-LABEL: bat04: @@ -7988,7 +7988,7 @@ define dso_local ptr @bat06() nounwind { ; DARWIN-32-PIC-NEXT: calll L96$pb ; DARWIN-32-PIC-NEXT: L96$pb: ; DARWIN-32-PIC-NEXT: popl %eax -; DARWIN-32-PIC-NEXT: leal (_lsrc-L96$pb)+64(%eax), %eax +; DARWIN-32-PIC-NEXT: leal _lsrc-L96$pb+64(%eax), %eax ; DARWIN-32-PIC-NEXT: retl ; ; DARWIN-64-STATIC-LABEL: bat06: @@ -8051,7 +8051,7 @@ define dso_local ptr @bat07() nounwind { ; DARWIN-32-PIC-NEXT: calll L97$pb ; DARWIN-32-PIC-NEXT: L97$pb: ; DARWIN-32-PIC-NEXT: popl %eax -; DARWIN-32-PIC-NEXT: leal (_ldst-L97$pb)+64(%eax), %eax +; DARWIN-32-PIC-NEXT: leal _ldst-L97$pb+64(%eax), %eax ; DARWIN-32-PIC-NEXT: retl ; ; DARWIN-64-STATIC-LABEL: bat07: @@ -8485,7 +8485,7 @@ define dso_local ptr @bam03() nounwind { ; DARWIN-32-PIC-NEXT: calll L103$pb ; DARWIN-32-PIC-NEXT: L103$pb: ; DARWIN-32-PIC-NEXT: popl %eax -; DARWIN-32-PIC-NEXT: leal (_dsrc-L103$pb)+262144(%eax), %eax +; DARWIN-32-PIC-NEXT: leal _dsrc-L103$pb+262144(%eax), %eax ; DARWIN-32-PIC-NEXT: retl ; ; DARWIN-64-STATIC-LABEL: bam03: @@ -8548,7 +8548,7 @@ define dso_local ptr @bam04() nounwind { ; DARWIN-32-PIC-NEXT: calll L104$pb ; DARWIN-32-PIC-NEXT: L104$pb: ; DARWIN-32-PIC-NEXT: popl %eax -; DARWIN-32-PIC-NEXT: leal (_ddst-L104$pb)+262144(%eax), %eax +; DARWIN-32-PIC-NEXT: leal _ddst-L104$pb+262144(%eax), %eax ; DARWIN-32-PIC-NEXT: retl ; ; DARWIN-64-STATIC-LABEL: bam04: @@ -8686,7 +8686,7 @@ define dso_local ptr @bam06() nounwind { ; DARWIN-32-PIC-NEXT: calll L106$pb ; DARWIN-32-PIC-NEXT: L106$pb: ; DARWIN-32-PIC-NEXT: popl %eax -; DARWIN-32-PIC-NEXT: leal (_lsrc-L106$pb)+262144(%eax), %eax +; DARWIN-32-PIC-NEXT: leal _lsrc-L106$pb+262144(%eax), %eax ; DARWIN-32-PIC-NEXT: retl ; ; DARWIN-64-STATIC-LABEL: bam06: @@ -8749,7 +8749,7 @@ define dso_local ptr @bam07() nounwind { ; DARWIN-32-PIC-NEXT: calll L107$pb ; DARWIN-32-PIC-NEXT: L107$pb: ; DARWIN-32-PIC-NEXT: popl %eax -; DARWIN-32-PIC-NEXT: leal (_ldst-L107$pb)+262144(%eax), %eax +; DARWIN-32-PIC-NEXT: leal _ldst-L107$pb+262144(%eax), %eax ; DARWIN-32-PIC-NEXT: retl ; ; DARWIN-64-STATIC-LABEL: bam07: @@ -9294,7 +9294,7 @@ define dso_local ptr @cat03(i64 %i) nounwind { ; DARWIN-32-PIC-NEXT: L114$pb: ; DARWIN-32-PIC-NEXT: popl %eax ; DARWIN-32-PIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; DARWIN-32-PIC-NEXT: leal (_dsrc-L114$pb)+64(%eax,%ecx,4), %eax +; DARWIN-32-PIC-NEXT: leal _dsrc-L114$pb+64(%eax,%ecx,4), %eax ; DARWIN-32-PIC-NEXT: retl ; ; DARWIN-64-STATIC-LABEL: cat03: @@ -9368,7 +9368,7 @@ define dso_local ptr @cat04(i64 %i) nounwind { ; DARWIN-32-PIC-NEXT: L115$pb: ; DARWIN-32-PIC-NEXT: popl %eax ; DARWIN-32-PIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; DARWIN-32-PIC-NEXT: leal (_ddst-L115$pb)+64(%eax,%ecx,4), %eax +; DARWIN-32-PIC-NEXT: leal _ddst-L115$pb+64(%eax,%ecx,4), %eax ; DARWIN-32-PIC-NEXT: retl ; ; DARWIN-64-STATIC-LABEL: cat04: @@ -9523,7 +9523,7 @@ define dso_local ptr @cat06(i64 %i) nounwind { ; DARWIN-32-PIC-NEXT: L117$pb: ; DARWIN-32-PIC-NEXT: popl %eax ; DARWIN-32-PIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; DARWIN-32-PIC-NEXT: leal (_lsrc-L117$pb)+64(%eax,%ecx,4), %eax +; DARWIN-32-PIC-NEXT: leal _lsrc-L117$pb+64(%eax,%ecx,4), %eax ; DARWIN-32-PIC-NEXT: retl ; ; DARWIN-64-STATIC-LABEL: cat06: @@ -9597,7 +9597,7 @@ define dso_local ptr @cat07(i64 %i) nounwind { ; DARWIN-32-PIC-NEXT: L118$pb: ; DARWIN-32-PIC-NEXT: popl %eax ; DARWIN-32-PIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; DARWIN-32-PIC-NEXT: leal (_ldst-L118$pb)+64(%eax,%ecx,4), %eax +; DARWIN-32-PIC-NEXT: leal _ldst-L118$pb+64(%eax,%ecx,4), %eax ; DARWIN-32-PIC-NEXT: retl ; ; DARWIN-64-STATIC-LABEL: cat07: @@ -10153,7 +10153,7 @@ define dso_local ptr @cam03(i64 %i) nounwind { ; DARWIN-32-PIC-NEXT: L125$pb: ; DARWIN-32-PIC-NEXT: popl %eax ; DARWIN-32-PIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; DARWIN-32-PIC-NEXT: leal (_dsrc-L125$pb)+262144(%eax,%ecx,4), %eax +; DARWIN-32-PIC-NEXT: leal _dsrc-L125$pb+262144(%eax,%ecx,4), %eax ; DARWIN-32-PIC-NEXT: retl ; ; DARWIN-64-STATIC-LABEL: cam03: @@ -10227,7 +10227,7 @@ define dso_local ptr @cam04(i64 %i) nounwind { ; DARWIN-32-PIC-NEXT: L126$pb: ; DARWIN-32-PIC-NEXT: popl %eax ; DARWIN-32-PIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; DARWIN-32-PIC-NEXT: leal (_ddst-L126$pb)+262144(%eax,%ecx,4), %eax +; DARWIN-32-PIC-NEXT: leal _ddst-L126$pb+262144(%eax,%ecx,4), %eax ; DARWIN-32-PIC-NEXT: retl ; ; DARWIN-64-STATIC-LABEL: cam04: @@ -10382,7 +10382,7 @@ define dso_local ptr @cam06(i64 %i) nounwind { ; DARWIN-32-PIC-NEXT: L128$pb: ; DARWIN-32-PIC-NEXT: popl %eax ; DARWIN-32-PIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; DARWIN-32-PIC-NEXT: leal (_lsrc-L128$pb)+262144(%eax,%ecx,4), %eax +; DARWIN-32-PIC-NEXT: leal _lsrc-L128$pb+262144(%eax,%ecx,4), %eax ; DARWIN-32-PIC-NEXT: retl ; ; DARWIN-64-STATIC-LABEL: cam06: @@ -10456,7 +10456,7 @@ define dso_local ptr @cam07(i64 %i) nounwind { ; DARWIN-32-PIC-NEXT: L129$pb: ; DARWIN-32-PIC-NEXT: popl %eax ; DARWIN-32-PIC-NEXT: movl {{[0-9]+}}(%esp), %ecx -; DARWIN-32-PIC-NEXT: leal (_ldst-L129$pb)+262144(%eax,%ecx,4), %eax +; DARWIN-32-PIC-NEXT: leal _ldst-L129$pb+262144(%eax,%ecx,4), %eax ; DARWIN-32-PIC-NEXT: retl ; ; DARWIN-64-STATIC-LABEL: cam07: diff --git a/llvm/test/CodeGen/X86/atomic-minmax-i6432.ll b/llvm/test/CodeGen/X86/atomic-minmax-i6432.ll index f8cbd0a6a9ee0..362135cb1808b 100644 --- a/llvm/test/CodeGen/X86/atomic-minmax-i6432.ll +++ b/llvm/test/CodeGen/X86/atomic-minmax-i6432.ll @@ -265,7 +265,7 @@ define void @tf_bug(ptr %ptr) nounwind { ; PIC-NEXT: L4$pb: ; PIC-NEXT: popl %edi ; PIC-NEXT: movl {{[0-9]+}}(%esp), %esi -; PIC-NEXT: movl (_id-L4$pb)+4(%edi), %edx +; PIC-NEXT: movl _id-L4$pb+4(%edi), %edx ; PIC-NEXT: movl _id-L4$pb(%edi), %eax ; PIC-NEXT: .p2align 4 ; PIC-NEXT: LBB4_1: ## %atomicrmw.start diff --git a/llvm/test/CodeGen/X86/callbr-asm-instr-scheduling.ll b/llvm/test/CodeGen/X86/callbr-asm-instr-scheduling.ll index 01f3a6fcab1fb..1d1a010b95573 100644 --- a/llvm/test/CodeGen/X86/callbr-asm-instr-scheduling.ll +++ b/llvm/test/CodeGen/X86/callbr-asm-instr-scheduling.ll @@ -36,7 +36,7 @@ define i64 @early_ioremap_pmd(i64 %addr) { ; CHECK-NEXT: .Ltmp0: ; CHECK-NEXT: jmp .Ltmp1 ; CHECK-NEXT: .Ltmp2: -; CHECK-NEXT: .zero (-(((.Ltmp3-.Ltmp4)-(.Ltmp2-.Ltmp0))>0))*((.Ltmp3-.Ltmp4)-(.Ltmp2-.Ltmp0)),144 +; CHECK-NEXT: .zero -((.Ltmp3-.Ltmp4-(.Ltmp2-.Ltmp0))>0)*(.Ltmp3-.Ltmp4-(.Ltmp2-.Ltmp0)),144 ; CHECK-NEXT: .Ltmp5: entry: %0 = tail call i64 asm sideeffect "mov %cr3,$0\0A\09", "=r,=*m,~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i64) nonnull @__force_order) diff --git a/llvm/test/CodeGen/X86/catchpad-reuse.ll b/llvm/test/CodeGen/X86/catchpad-reuse.ll index 8f30e806ea85a..163980fddf04f 100644 --- a/llvm/test/CodeGen/X86/catchpad-reuse.ll +++ b/llvm/test/CodeGen/X86/catchpad-reuse.ll @@ -19,11 +19,11 @@ ; CHECK: $cppxdata$main: ; CHECK-NEXT: .long 429065506 # MagicNumber ; CHECK-NEXT: .long 4 # MaxState -; CHECK-NEXT: .long ($stateUnwindMap$main)@IMGREL # UnwindMap +; CHECK-NEXT: .long $stateUnwindMap$main@IMGREL # UnwindMap ; CHECK-NEXT: .long 2 # NumTryBlocks -; CHECK-NEXT: .long ($tryMap$main)@IMGREL # TryBlockMap +; CHECK-NEXT: .long $tryMap$main@IMGREL # TryBlockMap ; CHECK-NEXT: .long 5 # IPMapEntries -; CHECK-NEXT: .long ($ip2state$main)@IMGREL # IPToStateXData +; CHECK-NEXT: .long $ip2state$main@IMGREL # IPToStateXData ; CHECK-NEXT: .long 32 # UnwindHelp ; CHECK-NEXT: .long 0 # ESTypeList ; CHECK-NEXT: .long 1 # EHFlags @@ -33,12 +33,12 @@ ; CHECK-NEXT: .long 1 # TryHigh ; CHECK-NEXT: .long 2 # CatchHigh ; CHECK-NEXT: .long 1 # NumCatches -; CHECK-NEXT: .long ($handlerMap$0$main)@IMGREL # HandlerArray +; CHECK-NEXT: .long $handlerMap$0$main@IMGREL # HandlerArray ; CHECK-NEXT: .long 0 # TryLow ; CHECK-NEXT: .long 2 # TryHigh ; CHECK-NEXT: .long 3 # CatchHigh ; CHECK-NEXT: .long 1 # NumCatches -; CHECK-NEXT: .long ($handlerMap$1$main)@IMGREL # HandlerArray +; CHECK-NEXT: .long $handlerMap$1$main@IMGREL # HandlerArray ; CHECK: $handlerMap$0$main: ; CHECK-NEXT: .long 0 # Adjectives diff --git a/llvm/test/CodeGen/X86/constant-pool-partition.ll b/llvm/test/CodeGen/X86/constant-pool-partition.ll new file mode 100644 index 0000000000000..d2c87b7b3fc14 --- /dev/null +++ b/llvm/test/CodeGen/X86/constant-pool-partition.ll @@ -0,0 +1,141 @@ +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +; Tests that constant pool hotness is aggregated across the module. The +; static-data-splitter processes data from @cold_func first, two functions +; without profiles secondly, and then @hot_func. Specifically, tests that +; 1. If a constant is accessed by hot functions, all constant pools for this +; constant (e.g., from an unprofiled function, or cold function) should have +; .hot suffix. +; 2. Similarly if a constant is accessed by both cold function and un-profiled +; function, constant pools for this constant should not have .unlikely suffix. + +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \ +; RUN: -partition-static-data-sections=true -function-sections=true -data-sections=true \ +; RUN: -unique-section-names=false \ +; RUN: %s -o - 2>&1 | FileCheck %s --dump-input=always + +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \ +; RUN: -partition-static-data-sections=true -function-sections=true -data-sections=true \ +; RUN: -unique-section-names=true \ +; RUN: %s -o - 2>&1 | FileCheck %s --dump-input=always + +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \ +; RUN: -partition-static-data-sections=true -function-sections=false -data-sections=false \ +; RUN: -unique-section-names=false \ +; RUN: %s -o - 2>&1 | FileCheck %s --dump-input=always + +;; For function @cold_func +; CHECK: .section .rodata.cst8.hot,"aM",@progbits,8 +; CHECK-NEXT: .p2align +; CHECK-NEXT: .LCPI0_0: +; CHECK-NEXT: .quad 0x3fe5c28f5c28f5c3 # double 0.68000000000000005 +; CHECK-NEXT: .section .rodata.cst8.unlikely,"aM",@progbits,8 +; CHECK-NEXT: .p2align +; CHECK-NEXT: .LCPI0_1: +; CHECK-NEXT: .quad 0x3eb0000000000000 # double 9.5367431640625E-7 +; CHECK-NEXT: .section .rodata.cst8,"aM",@progbits,8 +; CHECK-NEXT: .p2align +; CHECK-NEXT: .LCPI0_2: +; CHECK-NEXT: .quad 0x3fc0000000000000 # double 0.125 + +;; For function @unprofiled_func_double +; CHECK: .section .rodata.cst8,"aM",@progbits,8 +; CHECK-NEXT: .p2align +; CHECK-NEXT: .LCPI1_0: +; CHECK-NEXT: .quad 0x3fc0000000000000 # double 0.125 + +;; For function @unprofiled_func_float +; CHECK: .section .rodata.cst4,"aM",@progbits,4 +; CHECK-NEXT: .p2align +; CHECK-NEXT: .LCPI2_0: +; CHECK-NEXT: .long 0x3e000000 # float 0.125 + +;; For function @hot_func +; CHECK: .section .rodata.cst8.hot,"aM",@progbits,8 +; CHECK-NEXT: .p2align +; CHECK-NEXT: .LCPI3_0: +; CHECK-NEXT: .quad 0x3fe5c28f5c28f5c3 # double 0.68000000000000005 +; CHECK-NEXT: .section .rodata.cst16.hot,"aM",@progbits,16 +; CHECK-NEXT: .p2align +; CHECK-NEXT: .LCPI3_1: +; CHECK-NEXT: .long 2147483648 # 0x80000000 +; CHECK-NEXT: .long 2147483648 # 0x80000000 +; CHECK-NEXT: .long 2147483648 # 0x80000000 +; CHECK-NEXT: .long 2147483648 # 0x80000000 +; CHECK-NEXT: .LCPI3_2: +; CHECK-NEXT: .long 2147484090 # 0x800001ba +; CHECK-NEXT: .long 2147483748 # 0x80000064 +; CHECK-NEXT: .long 2147483648 # 0x80000000 +; CHECK-NEXT: .long 2147483648 # 0x80000000 + +; CHECK: .section .rodata.cst32,"aM",@progbits,32 +; CHECK-NEXT: .globl val + +define double @cold_func(double %x) !prof !16 { + %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01) + %y = fmul double %x, 0x3EB0000000000000 + %z = fmul double %y, 0x3fc0000000000000 + ret double %z +} + +define double @unprofiled_func_double(double %x) { + %z = fmul double %x, 0x3fc0000000000000 + ret double %z +} + +define float @unprofiled_func_float(float %x) { + %z = fmul float %x, 0x3fc0000000000000 + ret float %z +} + +define <4 x i1> @hot_func(i32 %0, <4 x i32> %a) !prof !17 { + %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01) + %b = icmp ule <4 x i32> %a, + ret <4 x i1> %b +} + +@val = unnamed_addr constant i256 1 + +define i32 @main(i32 %0, ptr %1) !prof !16 { + br label %7 + +5: ; preds = %7 + %x = call double @double_func() + call void @cold_func(double %x) + ret i32 0 + +7: ; preds = %7, %2 + %8 = phi i32 [ 0, %2 ], [ %10, %7 ] + %seed_val = load i256, ptr @val + %9 = call i32 @seed(i256 %seed_val) + call void @hot_func(i32 %9) + %10 = add i32 %8, 1 + %11 = icmp eq i32 %10, 100000 + br i1 %11, label %5, label %7, !prof !18 +} + +declare i32 @seed(i256) +declare double @double_func() +declare i32 @func_taking_arbitrary_param(...) + +!llvm.module.flags = !{!1} + +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10, !11, !12} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 1460617} +!5 = !{!"MaxCount", i64 849536} +!6 = !{!"MaxInternalCount", i64 32769} +!7 = !{!"MaxFunctionCount", i64 849536} +!8 = !{!"NumCounts", i64 23784} +!9 = !{!"NumFunctions", i64 3301} +!10 = !{!"IsPartialProfile", i64 0} +!11 = !{!"PartialProfileRatio", double 0.000000e+00} +!12 = !{!"DetailedSummary", !13} +!13 = !{!14, !15} +!14 = !{i32 990000, i64 166, i32 73} +!15 = !{i32 999999, i64 1, i32 1463} +!16 = !{!"function_entry_count", i64 1} +!17 = !{!"function_entry_count", i64 100000} +!18 = !{!"branch_weights", i32 1, i32 99999} diff --git a/llvm/test/CodeGen/X86/dollar-name.ll b/llvm/test/CodeGen/X86/dollar-name.ll index bc8cf5fb46537..fc9d6a77f66e5 100644 --- a/llvm/test/CodeGen/X86/dollar-name.ll +++ b/llvm/test/CodeGen/X86/dollar-name.ll @@ -5,9 +5,9 @@ @"$qux" = external dso_local global i32 define i32 @"$foo"() nounwind { -; CHECK: movl ($bar), -; CHECK: addl ($qux), -; CHECK: calll ($hen) +; CHECK: movl $bar, +; CHECK: addl $qux, +; CHECK: calll $hen %m = load i32, ptr @"$bar" %n = load i32, ptr @"$qux" %t = add i32 %m, %n diff --git a/llvm/test/CodeGen/X86/inline-asm-i-constraint-i1.ll b/llvm/test/CodeGen/X86/inline-asm-i-constraint-i1.ll index 4be7d18f8e66f..02cfec9fdfba6 100644 --- a/llvm/test/CodeGen/X86/inline-asm-i-constraint-i1.ll +++ b/llvm/test/CodeGen/X86/inline-asm-i-constraint-i1.ll @@ -2,7 +2,7 @@ ; Make sure that boolean immediates are properly (zero) extended. ; CHECK: .Ltmp[[N:[0-9]+]]: -; CHECK-NEXT: .quad (42+1)-.Ltmp[[N]] +; CHECK-NEXT: .quad 42+1-.Ltmp[[N]] target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/CodeGen/X86/insert-subvector-broadcast.ll b/llvm/test/CodeGen/X86/insert-subvector-broadcast.ll new file mode 100644 index 0000000000000..47cd752ef80a4 --- /dev/null +++ b/llvm/test/CodeGen/X86/insert-subvector-broadcast.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- -mcpu=skx | FileCheck %s + +define void @insert_subvector_broadcast_as_blend() { +; CHECK-LABEL: insert_subvector_broadcast_as_blend: +; CHECK: # %bb.0: +; CHECK-NEXT: movq (%rax), %rax +; CHECK-NEXT: incq %rax +; CHECK-NEXT: vpbroadcastq %rax, %zmm0 +; CHECK-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 +; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm1 +; CHECK-NEXT: vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %k0 +; CHECK-NEXT: vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k1 +; CHECK-NEXT: kunpckbw %k0, %k1, %k1 +; CHECK-NEXT: vmovdqu8 {{.*#+}} xmm0 {%k1} {z} = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; CHECK-NEXT: vmovdqa %xmm0, (%rax) +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %load4 = load i64, ptr poison, align 32 + %add = add i64 %load4, 1 + %insertelement5 = insertelement <16 x i64> zeroinitializer, i64 %add, i64 1 + %shufflevector = shufflevector <16 x i64> %insertelement5, <16 x i64> poison, <16 x i32> + %icmp6 = icmp slt <16 x i64> %shufflevector, + %shufflevector7 = shufflevector <16 x i1> poison, <16 x i1> %icmp6, <16 x i32> + %zext = zext <16 x i1> %shufflevector7 to <16 x i8> + store <16 x i8> %zext, ptr poison, align 32 + ret void +} diff --git a/llvm/test/CodeGen/X86/relptr-rodata.ll b/llvm/test/CodeGen/X86/relptr-rodata.ll index 0ca60b8db157a..878151efccc3b 100644 --- a/llvm/test/CodeGen/X86/relptr-rodata.ll +++ b/llvm/test/CodeGen/X86/relptr-rodata.ll @@ -24,7 +24,7 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: .globl obj ; CHECK: obj: ; CHECK: .long 0 -; CHECK: .long (hidden_func-obj)-4 +; CHECK: .long hidden_func-obj-4 declare hidden void @hidden_func() diff --git a/llvm/test/CodeGen/X86/seh-unwind-inline-asm-codegen.ll b/llvm/test/CodeGen/X86/seh-unwind-inline-asm-codegen.ll index 63a3188aaad7b..2c576df1b7549 100644 --- a/llvm/test/CodeGen/X86/seh-unwind-inline-asm-codegen.ll +++ b/llvm/test/CodeGen/X86/seh-unwind-inline-asm-codegen.ll @@ -42,11 +42,11 @@ declare dso_local void @printf(ptr, ...) ; CHECK-LABEL: $cppxdata$test: ; CHECK-NEXT: .long 429065506 # MagicNumber ; CHECK-NEXT: .long 1 # MaxState -; CHECK-NEXT: .long ($stateUnwindMap$test)@IMGREL # UnwindMap +; CHECK-NEXT: .long $stateUnwindMap$test@IMGREL # UnwindMap ; CHECK-NEXT: .long 0 # NumTryBlocks ; CHECK-NEXT: .long 0 # TryBlockMap ; CHECK-NEXT: .long 3 # IPMapEntries -; CHECK-NEXT: .long ($ip2state$test)@IMGREL # IPToStateXData +; CHECK-NEXT: .long $ip2state$test@IMGREL # IPToStateXData ; CHECK-NEXT: .long 40 # UnwindHelp ; CHECK-NEXT: .long 0 # ESTypeList ; CHECK-NEXT: .long 1 # EHFlags diff --git a/llvm/test/CodeGen/X86/stack-coloring-wineh.ll b/llvm/test/CodeGen/X86/stack-coloring-wineh.ll index 198f1bf198620..e2de2ff4a392e 100644 --- a/llvm/test/CodeGen/X86/stack-coloring-wineh.ll +++ b/llvm/test/CodeGen/X86/stack-coloring-wineh.ll @@ -96,7 +96,7 @@ define void @pr66984(ptr %arg) personality ptr @__CxxFrameHandler3 { ; X86_64-NEXT: .seh_endepilogue ; X86_64-NEXT: retq ; X86_64-NEXT: .seh_handlerdata -; X86_64-NEXT: .long ($cppxdata$pr66984)@IMGREL +; X86_64-NEXT: .long $cppxdata$pr66984@IMGREL ; X86_64-NEXT: .text ; X86_64-NEXT: .seh_endproc ; X86_64-NEXT: .def "?catch$2@?0?pr66984@4HA"; @@ -124,7 +124,7 @@ define void @pr66984(ptr %arg) personality ptr @__CxxFrameHandler3 { ; X86_64-NEXT: .seh_endepilogue ; X86_64-NEXT: retq # CATCHRET ; X86_64-NEXT: .seh_handlerdata -; X86_64-NEXT: .long ($cppxdata$pr66984)@IMGREL +; X86_64-NEXT: .long $cppxdata$pr66984@IMGREL ; X86_64-NEXT: .text ; X86_64-NEXT: .seh_endproc ; X86_64-NEXT: .def "?dtor$4@?0?pr66984@4HA"; diff --git a/llvm/test/CodeGen/X86/tailcc-ssp.ll b/llvm/test/CodeGen/X86/tailcc-ssp.ll index 914af1466147a..5211e4fe9eef9 100644 --- a/llvm/test/CodeGen/X86/tailcc-ssp.ll +++ b/llvm/test/CodeGen/X86/tailcc-ssp.ll @@ -101,3 +101,24 @@ define void @tailcall_unrelated_frame() sspreq { tail call void @bar() ret void } + +declare void @callee() +define void @caller() sspreq { +; WINDOWS-LABEL: caller: +; WINDOWS: callq callee +; WINDOWS: callq callee +; WINDOWS: cmpq __security_cookie(%rip), %rcx +; WINDOWS: jne +; WINDOWS: callq __security_check_cookie + +; LINUX-LABEL: caller: +; LINUX: callq callee@PLT +; LINUX: callq callee@PLT +; LINUX: cmpq +; LINUX: jne +; LINUX: callq __stack_chk_fail@PLT + + tail call void @callee() + call void @callee() + ret void +} diff --git a/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-5.ll b/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-5.ll index 0df63422b5d84..e4fa594f3dd72 100644 --- a/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-5.ll +++ b/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-5.ll @@ -449,9 +449,8 @@ define void @store_i16_stride5_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec ; AVX512-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,3,1,3,4,5,6,7] ; AVX512-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX512-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[3],xmm0[4,5,6,7] -; AVX512-NEXT: vinserti32x4 $2, %xmm0, %zmm2, %zmm1 ; AVX512-NEXT: vmovq %xmm0, 32(%r9) -; AVX512-NEXT: vmovdqa %ymm1, (%r9) +; AVX512-NEXT: vmovdqa %ymm2, (%r9) ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq ; @@ -476,9 +475,8 @@ define void @store_i16_stride5_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec ; AVX512-FCP-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,6,7,14,15,u,u,8,9,10,11,12,13,14,15] ; AVX512-FCP-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX512-FCP-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[3],xmm0[4,5,6,7] -; AVX512-FCP-NEXT: vinserti32x4 $2, %xmm0, %zmm2, %zmm1 ; AVX512-FCP-NEXT: vmovq %xmm0, 32(%r9) -; AVX512-FCP-NEXT: vmovdqa %ymm1, (%r9) +; AVX512-FCP-NEXT: vmovdqa %ymm2, (%r9) ; AVX512-FCP-NEXT: vzeroupper ; AVX512-FCP-NEXT: retq ; @@ -504,9 +502,8 @@ define void @store_i16_stride5_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec ; AVX512DQ-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,3,1,3,4,5,6,7] ; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[3],xmm0[4,5,6,7] -; AVX512DQ-NEXT: vinserti32x4 $2, %xmm0, %zmm2, %zmm1 ; AVX512DQ-NEXT: vmovq %xmm0, 32(%r9) -; AVX512DQ-NEXT: vmovdqa %ymm1, (%r9) +; AVX512DQ-NEXT: vmovdqa %ymm2, (%r9) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; @@ -531,9 +528,8 @@ define void @store_i16_stride5_vf4(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec ; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,6,7,14,15,u,u,8,9,10,11,12,13,14,15] ; AVX512DQ-FCP-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX512DQ-FCP-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[3],xmm0[4,5,6,7] -; AVX512DQ-FCP-NEXT: vinserti32x4 $2, %xmm0, %zmm2, %zmm1 ; AVX512DQ-FCP-NEXT: vmovq %xmm0, 32(%r9) -; AVX512DQ-FCP-NEXT: vmovdqa %ymm1, (%r9) +; AVX512DQ-FCP-NEXT: vmovdqa %ymm2, (%r9) ; AVX512DQ-FCP-NEXT: vzeroupper ; AVX512DQ-FCP-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-7.ll b/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-7.ll index bc08f57e5faac..e4e013446f7a5 100644 --- a/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-7.ll +++ b/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-7.ll @@ -1380,29 +1380,28 @@ define void @store_i16_stride7_vf8(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec ; AVX512-NEXT: vinserti64x4 $1, %ymm12, %zmm11, %zmm11 ; AVX512-NEXT: vpternlogq {{.*#+}} zmm11 = (zmm11 & mem) | zmm10 ; AVX512-NEXT: vpternlogq {{.*#+}} zmm11 = zmm11 ^ (mem & (zmm11 ^ zmm9)) -; AVX512-NEXT: vpsrlq $48, %xmm4, %xmm4 -; AVX512-NEXT: vpunpckhqdq {{.*#+}} xmm3 = xmm3[1],xmm4[1] +; AVX512-NEXT: vpermq {{.*#+}} ymm2 = ymm2[1,3,1,3] +; AVX512-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[u,u,u,u,u,u],zero,zero,zero,zero,ymm2[2,3,10,11,u,u,u,u,u,u],zero,zero,zero,zero,ymm2[20,21,28,29,u,u,u,u] +; AVX512-NEXT: vpermq {{.*#+}} ymm7 = ymm7[1,3,1,3] +; AVX512-NEXT: vpshufb {{.*#+}} ymm7 = ymm7[u,u,u,u,u,u,2,3,10,11],zero,zero,zero,zero,ymm7[u,u,u,u,u,u,20,21,28,29],zero,zero,zero,zero,ymm7[u,u,u,u] +; AVX512-NEXT: vpor %ymm2, %ymm7, %ymm2 +; AVX512-NEXT: vpermq {{.*#+}} ymm7 = ymm8[1,3,3,1] +; AVX512-NEXT: vpshufb {{.*#+}} ymm7 = ymm7[0,1,8,9],zero,zero,ymm7[u,u,u,u,u,u,u,u,2,3,18,19],zero,zero,ymm7[u,u,u,u,u,u,u,u,28,29,20,21] +; AVX512-NEXT: vmovdqa {{.*#+}} ymm8 = [65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535] +; AVX512-NEXT: vpternlogd {{.*#+}} ymm8 = (mem & ~ymm8) | ymm7 +; AVX512-NEXT: vpternlogq {{.*#+}} ymm8 = ymm2 ^ (mem & (ymm8 ^ ymm2)) +; AVX512-NEXT: vpsrlq $48, %xmm4, %xmm2 +; AVX512-NEXT: vpunpckhqdq {{.*#+}} xmm2 = xmm3[1],xmm2[1] ; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] ; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; AVX512-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[3,4],xmm0[5,6,7] +; AVX512-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm2[3,4],xmm0[5,6,7] ; AVX512-NEXT: vpsrld $16, %xmm6, %xmm1 ; AVX512-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm5[2],xmm1[2],xmm5[3],xmm1[3] -; AVX512-NEXT: vpbroadcastd 12(%r10), %xmm3 -; AVX512-NEXT: vpblendw {{.*#+}} xmm1 = xmm3[0,1,2,3,4],xmm1[5,6],xmm3[7] +; AVX512-NEXT: vpbroadcastd 12(%r10), %xmm2 +; AVX512-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3,4],xmm1[5,6],xmm2[7] ; AVX512-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4],xmm1[5,6,7] -; AVX512-NEXT: vpermq {{.*#+}} ymm1 = ymm2[1,3,1,3] -; AVX512-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u],zero,zero,zero,zero,ymm1[2,3,10,11,u,u,u,u,u,u],zero,zero,zero,zero,ymm1[20,21,28,29,u,u,u,u] -; AVX512-NEXT: vpermq {{.*#+}} ymm2 = ymm7[1,3,1,3] -; AVX512-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[u,u,u,u,u,u,2,3,10,11],zero,zero,zero,zero,ymm2[u,u,u,u,u,u,20,21,28,29],zero,zero,zero,zero,ymm2[u,u,u,u] -; AVX512-NEXT: vpor %ymm1, %ymm2, %ymm1 -; AVX512-NEXT: vpermq {{.*#+}} ymm2 = ymm8[1,3,3,1] -; AVX512-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[0,1,8,9],zero,zero,ymm2[u,u,u,u,u,u,u,u,2,3,18,19],zero,zero,ymm2[u,u,u,u,u,u,u,u,28,29,20,21] -; AVX512-NEXT: vmovdqa {{.*#+}} ymm3 = [65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535] -; AVX512-NEXT: vpternlogd {{.*#+}} ymm3 = (mem & ~ymm3) | ymm2 -; AVX512-NEXT: vpternlogq {{.*#+}} ymm3 = ymm1 ^ (mem & (ymm3 ^ ymm1)) -; AVX512-NEXT: vinserti32x4 $2, %xmm0, %zmm3, %zmm1 ; AVX512-NEXT: vmovdqa %xmm0, 96(%rax) -; AVX512-NEXT: vmovdqa %ymm1, 64(%rax) +; AVX512-NEXT: vmovdqa %ymm8, 64(%rax) ; AVX512-NEXT: vmovdqa64 %zmm11, (%rax) ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -1420,6 +1419,37 @@ define void @store_i16_stride7_vf8(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec ; AVX512-FCP-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm6 ; AVX512-FCP-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm7 ; AVX512-FCP-NEXT: vinserti128 $1, %xmm5, %ymm4, %ymm8 +; AVX512-FCP-NEXT: vpermq {{.*#+}} ymm9 = ymm7[0,2,0,2] +; AVX512-FCP-NEXT: vpshufb {{.*#+}} ymm9 = zero,zero,zero,zero,ymm9[0,1,8,9,u,u,u,u,u,u],zero,zero,zero,zero,ymm9[18,19,26,27,u,u,u,u,u,u],zero,zero,zero,zero +; AVX512-FCP-NEXT: vpermq {{.*#+}} ymm10 = ymm6[0,2,1,3] +; AVX512-FCP-NEXT: vpshufb {{.*#+}} ymm10 = zero,zero,zero,zero,ymm10[u,u,u,u,u,u,6,7,14,15],zero,zero,zero,zero,ymm10[u,u,u,u,u,u,16,17,24,25],zero,zero,zero,zero +; AVX512-FCP-NEXT: vinserti64x4 $1, %ymm10, %zmm9, %zmm9 +; AVX512-FCP-NEXT: vpermq {{.*#+}} ymm10 = ymm6[0,2,2,0] +; AVX512-FCP-NEXT: vpshufb {{.*#+}} ymm10 = ymm10[0,1,8,9],zero,zero,zero,zero,ymm10[u,u,u,u,u,u,2,3,18,19],zero,zero,zero,zero,ymm10[u,u,u,u,u,u,28,29,20,21] +; AVX512-FCP-NEXT: vpmovsxbd {{.*#+}} ymm11 = [1,5,0,0,5,2,6,0] +; AVX512-FCP-NEXT: vpermd %ymm7, %ymm11, %ymm11 +; AVX512-FCP-NEXT: vpshufb {{.*#+}} ymm11 = ymm11[0,1,4,5,u,u,u,u,u,u],zero,zero,zero,zero,ymm11[2,3,18,19,u,u,u,u,u,u],zero,zero,zero,zero,ymm11[20,21,24,25] +; AVX512-FCP-NEXT: vinserti64x4 $1, %ymm11, %zmm10, %zmm10 +; AVX512-FCP-NEXT: vporq %zmm9, %zmm10, %zmm9 +; AVX512-FCP-NEXT: vpermq {{.*#+}} ymm10 = ymm8[0,2,0,2] +; AVX512-FCP-NEXT: vpshufb {{.*#+}} ymm11 = ymm10[u,u,u,u,u,u,u,u,0,1,8,9],zero,zero,ymm10[u,u,u,u,u,u,u,u,18,19,26,27],zero,zero,ymm10[u,u,u,u] +; AVX512-FCP-NEXT: vpshufb {{.*#+}} ymm10 = ymm10[u,u,u,u,4,5,12,13],zero,zero,ymm10[u,u,u,u,u,u,u,u,22,23,30,31],zero,zero,ymm10[u,u,u,u,u,u,u,u] +; AVX512-FCP-NEXT: vinserti64x4 $1, %ymm10, %zmm11, %zmm10 +; AVX512-FCP-NEXT: vpbroadcastd (%r10), %ymm11 +; AVX512-FCP-NEXT: vpbroadcastd 4(%r10), %ymm12 +; AVX512-FCP-NEXT: vinserti64x4 $1, %ymm12, %zmm11, %zmm11 +; AVX512-FCP-NEXT: vpternlogq {{.*#+}} zmm11 = (zmm11 & mem) | zmm10 +; AVX512-FCP-NEXT: vpternlogq {{.*#+}} zmm11 = zmm11 ^ (mem & (zmm11 ^ zmm9)) +; AVX512-FCP-NEXT: vpermq {{.*#+}} ymm7 = ymm7[1,3,1,3] +; AVX512-FCP-NEXT: vpshufb {{.*#+}} ymm7 = ymm7[u,u,u,u,u,u],zero,zero,zero,zero,ymm7[2,3,10,11,u,u,u,u,u,u],zero,zero,zero,zero,ymm7[20,21,28,29,u,u,u,u] +; AVX512-FCP-NEXT: vpermq {{.*#+}} ymm6 = ymm6[1,3,1,3] +; AVX512-FCP-NEXT: vpshufb {{.*#+}} ymm6 = ymm6[u,u,u,u,u,u,2,3,10,11],zero,zero,zero,zero,ymm6[u,u,u,u,u,u,20,21,28,29],zero,zero,zero,zero,ymm6[u,u,u,u] +; AVX512-FCP-NEXT: vpor %ymm7, %ymm6, %ymm6 +; AVX512-FCP-NEXT: vpermq {{.*#+}} ymm7 = ymm8[1,3,3,1] +; AVX512-FCP-NEXT: vpshufb {{.*#+}} ymm7 = ymm7[0,1,8,9],zero,zero,ymm7[u,u,u,u,u,u,u,u,2,3,18,19],zero,zero,ymm7[u,u,u,u,u,u,u,u,28,29,20,21] +; AVX512-FCP-NEXT: vmovdqa {{.*#+}} ymm8 = [65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535] +; AVX512-FCP-NEXT: vpternlogd {{.*#+}} ymm8 = (mem & ~ymm8) | ymm7 +; AVX512-FCP-NEXT: vpternlogq {{.*#+}} ymm8 = ymm6 ^ (mem & (ymm8 ^ ymm6)) ; AVX512-FCP-NEXT: vpsrlq $48, %xmm3, %xmm3 ; AVX512-FCP-NEXT: vpunpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm3[1] ; AVX512-FCP-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] @@ -1430,41 +1460,9 @@ define void @store_i16_stride7_vf8(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec ; AVX512-FCP-NEXT: vpbroadcastd 12(%r10), %xmm2 ; AVX512-FCP-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3,4],xmm1[5,6],xmm2[7] ; AVX512-FCP-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4],xmm1[5,6,7] -; AVX512-FCP-NEXT: vpermq {{.*#+}} ymm1 = ymm7[1,3,1,3] -; AVX512-FCP-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u],zero,zero,zero,zero,ymm1[2,3,10,11,u,u,u,u,u,u],zero,zero,zero,zero,ymm1[20,21,28,29,u,u,u,u] -; AVX512-FCP-NEXT: vpermq {{.*#+}} ymm2 = ymm6[1,3,1,3] -; AVX512-FCP-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[u,u,u,u,u,u,2,3,10,11],zero,zero,zero,zero,ymm2[u,u,u,u,u,u,20,21,28,29],zero,zero,zero,zero,ymm2[u,u,u,u] -; AVX512-FCP-NEXT: vpor %ymm1, %ymm2, %ymm1 -; AVX512-FCP-NEXT: vpermq {{.*#+}} ymm2 = ymm8[1,3,3,1] -; AVX512-FCP-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[0,1,8,9],zero,zero,ymm2[u,u,u,u,u,u,u,u,2,3,18,19],zero,zero,ymm2[u,u,u,u,u,u,u,u,28,29,20,21] -; AVX512-FCP-NEXT: vmovdqa {{.*#+}} ymm3 = [65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535] -; AVX512-FCP-NEXT: vpternlogd {{.*#+}} ymm3 = (mem & ~ymm3) | ymm2 -; AVX512-FCP-NEXT: vpternlogq {{.*#+}} ymm3 = ymm1 ^ (mem & (ymm3 ^ ymm1)) -; AVX512-FCP-NEXT: vinserti32x4 $2, %xmm0, %zmm3, %zmm1 -; AVX512-FCP-NEXT: vpermq {{.*#+}} ymm2 = ymm7[0,2,0,2] -; AVX512-FCP-NEXT: vpshufb {{.*#+}} ymm2 = zero,zero,zero,zero,ymm2[0,1,8,9,u,u,u,u,u,u],zero,zero,zero,zero,ymm2[18,19,26,27,u,u,u,u,u,u],zero,zero,zero,zero -; AVX512-FCP-NEXT: vpermq {{.*#+}} ymm3 = ymm6[0,2,1,3] -; AVX512-FCP-NEXT: vpshufb {{.*#+}} ymm3 = zero,zero,zero,zero,ymm3[u,u,u,u,u,u,6,7,14,15],zero,zero,zero,zero,ymm3[u,u,u,u,u,u,16,17,24,25],zero,zero,zero,zero -; AVX512-FCP-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 -; AVX512-FCP-NEXT: vpermq {{.*#+}} ymm3 = ymm6[0,2,2,0] -; AVX512-FCP-NEXT: vpshufb {{.*#+}} ymm3 = ymm3[0,1,8,9],zero,zero,zero,zero,ymm3[u,u,u,u,u,u,2,3,18,19],zero,zero,zero,zero,ymm3[u,u,u,u,u,u,28,29,20,21] -; AVX512-FCP-NEXT: vpmovsxbd {{.*#+}} ymm4 = [1,5,0,0,5,2,6,0] -; AVX512-FCP-NEXT: vpermd %ymm7, %ymm4, %ymm4 -; AVX512-FCP-NEXT: vpshufb {{.*#+}} ymm4 = ymm4[0,1,4,5,u,u,u,u,u,u],zero,zero,zero,zero,ymm4[2,3,18,19,u,u,u,u,u,u],zero,zero,zero,zero,ymm4[20,21,24,25] -; AVX512-FCP-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3 -; AVX512-FCP-NEXT: vporq %zmm2, %zmm3, %zmm2 -; AVX512-FCP-NEXT: vpermq {{.*#+}} ymm3 = ymm8[0,2,0,2] -; AVX512-FCP-NEXT: vpshufb {{.*#+}} ymm4 = ymm3[u,u,u,u,u,u,u,u,0,1,8,9],zero,zero,ymm3[u,u,u,u,u,u,u,u,18,19,26,27],zero,zero,ymm3[u,u,u,u] -; AVX512-FCP-NEXT: vpshufb {{.*#+}} ymm3 = ymm3[u,u,u,u,4,5,12,13],zero,zero,ymm3[u,u,u,u,u,u,u,u,22,23,30,31],zero,zero,ymm3[u,u,u,u,u,u,u,u] -; AVX512-FCP-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3 -; AVX512-FCP-NEXT: vpbroadcastd (%r10), %ymm4 -; AVX512-FCP-NEXT: vpbroadcastd 4(%r10), %ymm5 -; AVX512-FCP-NEXT: vinserti64x4 $1, %ymm5, %zmm4, %zmm4 -; AVX512-FCP-NEXT: vpternlogq {{.*#+}} zmm4 = (zmm4 & mem) | zmm3 -; AVX512-FCP-NEXT: vpternlogq {{.*#+}} zmm4 = zmm4 ^ (mem & (zmm4 ^ zmm2)) ; AVX512-FCP-NEXT: vmovdqa %xmm0, 96(%rax) -; AVX512-FCP-NEXT: vmovdqa64 %zmm4, (%rax) -; AVX512-FCP-NEXT: vmovdqa %ymm1, 64(%rax) +; AVX512-FCP-NEXT: vmovdqa %ymm8, 64(%rax) +; AVX512-FCP-NEXT: vmovdqa64 %zmm11, (%rax) ; AVX512-FCP-NEXT: vzeroupper ; AVX512-FCP-NEXT: retq ; @@ -1505,29 +1503,28 @@ define void @store_i16_stride7_vf8(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm12, %zmm11, %zmm11 ; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm11 = (zmm11 & mem) | zmm10 ; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm11 = zmm11 ^ (mem & (zmm11 ^ zmm9)) -; AVX512DQ-NEXT: vpsrlq $48, %xmm4, %xmm4 -; AVX512DQ-NEXT: vpunpckhqdq {{.*#+}} xmm3 = xmm3[1],xmm4[1] +; AVX512DQ-NEXT: vpermq {{.*#+}} ymm2 = ymm2[1,3,1,3] +; AVX512DQ-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[u,u,u,u,u,u],zero,zero,zero,zero,ymm2[2,3,10,11,u,u,u,u,u,u],zero,zero,zero,zero,ymm2[20,21,28,29,u,u,u,u] +; AVX512DQ-NEXT: vpermq {{.*#+}} ymm7 = ymm7[1,3,1,3] +; AVX512DQ-NEXT: vpshufb {{.*#+}} ymm7 = ymm7[u,u,u,u,u,u,2,3,10,11],zero,zero,zero,zero,ymm7[u,u,u,u,u,u,20,21,28,29],zero,zero,zero,zero,ymm7[u,u,u,u] +; AVX512DQ-NEXT: vpor %ymm2, %ymm7, %ymm2 +; AVX512DQ-NEXT: vpermq {{.*#+}} ymm7 = ymm8[1,3,3,1] +; AVX512DQ-NEXT: vpshufb {{.*#+}} ymm7 = ymm7[0,1,8,9],zero,zero,ymm7[u,u,u,u,u,u,u,u,2,3,18,19],zero,zero,ymm7[u,u,u,u,u,u,u,u,28,29,20,21] +; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm8 = [65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535] +; AVX512DQ-NEXT: vpternlogd {{.*#+}} ymm8 = (mem & ~ymm8) | ymm7 +; AVX512DQ-NEXT: vpternlogq {{.*#+}} ymm8 = ymm2 ^ (mem & (ymm8 ^ ymm2)) +; AVX512DQ-NEXT: vpsrlq $48, %xmm4, %xmm2 +; AVX512DQ-NEXT: vpunpckhqdq {{.*#+}} xmm2 = xmm3[1],xmm2[1] ; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] ; AVX512DQ-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[3,4],xmm0[5,6,7] +; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm2[3,4],xmm0[5,6,7] ; AVX512DQ-NEXT: vpsrld $16, %xmm6, %xmm1 ; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm5[2],xmm1[2],xmm5[3],xmm1[3] -; AVX512DQ-NEXT: vpbroadcastd 12(%r10), %xmm3 -; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm1 = xmm3[0,1,2,3,4],xmm1[5,6],xmm3[7] +; AVX512DQ-NEXT: vpbroadcastd 12(%r10), %xmm2 +; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3,4],xmm1[5,6],xmm2[7] ; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4],xmm1[5,6,7] -; AVX512DQ-NEXT: vpermq {{.*#+}} ymm1 = ymm2[1,3,1,3] -; AVX512DQ-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u],zero,zero,zero,zero,ymm1[2,3,10,11,u,u,u,u,u,u],zero,zero,zero,zero,ymm1[20,21,28,29,u,u,u,u] -; AVX512DQ-NEXT: vpermq {{.*#+}} ymm2 = ymm7[1,3,1,3] -; AVX512DQ-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[u,u,u,u,u,u,2,3,10,11],zero,zero,zero,zero,ymm2[u,u,u,u,u,u,20,21,28,29],zero,zero,zero,zero,ymm2[u,u,u,u] -; AVX512DQ-NEXT: vpor %ymm1, %ymm2, %ymm1 -; AVX512DQ-NEXT: vpermq {{.*#+}} ymm2 = ymm8[1,3,3,1] -; AVX512DQ-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[0,1,8,9],zero,zero,ymm2[u,u,u,u,u,u,u,u,2,3,18,19],zero,zero,ymm2[u,u,u,u,u,u,u,u,28,29,20,21] -; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm3 = [65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535] -; AVX512DQ-NEXT: vpternlogd {{.*#+}} ymm3 = (mem & ~ymm3) | ymm2 -; AVX512DQ-NEXT: vpternlogq {{.*#+}} ymm3 = ymm1 ^ (mem & (ymm3 ^ ymm1)) -; AVX512DQ-NEXT: vinserti32x4 $2, %xmm0, %zmm3, %zmm1 ; AVX512DQ-NEXT: vmovdqa %xmm0, 96(%rax) -; AVX512DQ-NEXT: vmovdqa %ymm1, 64(%rax) +; AVX512DQ-NEXT: vmovdqa %ymm8, 64(%rax) ; AVX512DQ-NEXT: vmovdqa64 %zmm11, (%rax) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq @@ -1545,6 +1542,37 @@ define void @store_i16_stride7_vf8(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec ; AVX512DQ-FCP-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm6 ; AVX512DQ-FCP-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm7 ; AVX512DQ-FCP-NEXT: vinserti128 $1, %xmm5, %ymm4, %ymm8 +; AVX512DQ-FCP-NEXT: vpermq {{.*#+}} ymm9 = ymm7[0,2,0,2] +; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} ymm9 = zero,zero,zero,zero,ymm9[0,1,8,9,u,u,u,u,u,u],zero,zero,zero,zero,ymm9[18,19,26,27,u,u,u,u,u,u],zero,zero,zero,zero +; AVX512DQ-FCP-NEXT: vpermq {{.*#+}} ymm10 = ymm6[0,2,1,3] +; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} ymm10 = zero,zero,zero,zero,ymm10[u,u,u,u,u,u,6,7,14,15],zero,zero,zero,zero,ymm10[u,u,u,u,u,u,16,17,24,25],zero,zero,zero,zero +; AVX512DQ-FCP-NEXT: vinserti64x4 $1, %ymm10, %zmm9, %zmm9 +; AVX512DQ-FCP-NEXT: vpermq {{.*#+}} ymm10 = ymm6[0,2,2,0] +; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} ymm10 = ymm10[0,1,8,9],zero,zero,zero,zero,ymm10[u,u,u,u,u,u,2,3,18,19],zero,zero,zero,zero,ymm10[u,u,u,u,u,u,28,29,20,21] +; AVX512DQ-FCP-NEXT: vpmovsxbd {{.*#+}} ymm11 = [1,5,0,0,5,2,6,0] +; AVX512DQ-FCP-NEXT: vpermd %ymm7, %ymm11, %ymm11 +; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} ymm11 = ymm11[0,1,4,5,u,u,u,u,u,u],zero,zero,zero,zero,ymm11[2,3,18,19,u,u,u,u,u,u],zero,zero,zero,zero,ymm11[20,21,24,25] +; AVX512DQ-FCP-NEXT: vinserti64x4 $1, %ymm11, %zmm10, %zmm10 +; AVX512DQ-FCP-NEXT: vporq %zmm9, %zmm10, %zmm9 +; AVX512DQ-FCP-NEXT: vpermq {{.*#+}} ymm10 = ymm8[0,2,0,2] +; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} ymm11 = ymm10[u,u,u,u,u,u,u,u,0,1,8,9],zero,zero,ymm10[u,u,u,u,u,u,u,u,18,19,26,27],zero,zero,ymm10[u,u,u,u] +; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} ymm10 = ymm10[u,u,u,u,4,5,12,13],zero,zero,ymm10[u,u,u,u,u,u,u,u,22,23,30,31],zero,zero,ymm10[u,u,u,u,u,u,u,u] +; AVX512DQ-FCP-NEXT: vinserti64x4 $1, %ymm10, %zmm11, %zmm10 +; AVX512DQ-FCP-NEXT: vpbroadcastd (%r10), %ymm11 +; AVX512DQ-FCP-NEXT: vpbroadcastd 4(%r10), %ymm12 +; AVX512DQ-FCP-NEXT: vinserti64x4 $1, %ymm12, %zmm11, %zmm11 +; AVX512DQ-FCP-NEXT: vpternlogq {{.*#+}} zmm11 = (zmm11 & mem) | zmm10 +; AVX512DQ-FCP-NEXT: vpternlogq {{.*#+}} zmm11 = zmm11 ^ (mem & (zmm11 ^ zmm9)) +; AVX512DQ-FCP-NEXT: vpermq {{.*#+}} ymm7 = ymm7[1,3,1,3] +; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} ymm7 = ymm7[u,u,u,u,u,u],zero,zero,zero,zero,ymm7[2,3,10,11,u,u,u,u,u,u],zero,zero,zero,zero,ymm7[20,21,28,29,u,u,u,u] +; AVX512DQ-FCP-NEXT: vpermq {{.*#+}} ymm6 = ymm6[1,3,1,3] +; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} ymm6 = ymm6[u,u,u,u,u,u,2,3,10,11],zero,zero,zero,zero,ymm6[u,u,u,u,u,u,20,21,28,29],zero,zero,zero,zero,ymm6[u,u,u,u] +; AVX512DQ-FCP-NEXT: vpor %ymm7, %ymm6, %ymm6 +; AVX512DQ-FCP-NEXT: vpermq {{.*#+}} ymm7 = ymm8[1,3,3,1] +; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} ymm7 = ymm7[0,1,8,9],zero,zero,ymm7[u,u,u,u,u,u,u,u,2,3,18,19],zero,zero,ymm7[u,u,u,u,u,u,u,u,28,29,20,21] +; AVX512DQ-FCP-NEXT: vmovdqa {{.*#+}} ymm8 = [65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535] +; AVX512DQ-FCP-NEXT: vpternlogd {{.*#+}} ymm8 = (mem & ~ymm8) | ymm7 +; AVX512DQ-FCP-NEXT: vpternlogq {{.*#+}} ymm8 = ymm6 ^ (mem & (ymm8 ^ ymm6)) ; AVX512DQ-FCP-NEXT: vpsrlq $48, %xmm3, %xmm3 ; AVX512DQ-FCP-NEXT: vpunpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm3[1] ; AVX512DQ-FCP-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] @@ -1555,41 +1583,9 @@ define void @store_i16_stride7_vf8(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec ; AVX512DQ-FCP-NEXT: vpbroadcastd 12(%r10), %xmm2 ; AVX512DQ-FCP-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3,4],xmm1[5,6],xmm2[7] ; AVX512DQ-FCP-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4],xmm1[5,6,7] -; AVX512DQ-FCP-NEXT: vpermq {{.*#+}} ymm1 = ymm7[1,3,1,3] -; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u],zero,zero,zero,zero,ymm1[2,3,10,11,u,u,u,u,u,u],zero,zero,zero,zero,ymm1[20,21,28,29,u,u,u,u] -; AVX512DQ-FCP-NEXT: vpermq {{.*#+}} ymm2 = ymm6[1,3,1,3] -; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[u,u,u,u,u,u,2,3,10,11],zero,zero,zero,zero,ymm2[u,u,u,u,u,u,20,21,28,29],zero,zero,zero,zero,ymm2[u,u,u,u] -; AVX512DQ-FCP-NEXT: vpor %ymm1, %ymm2, %ymm1 -; AVX512DQ-FCP-NEXT: vpermq {{.*#+}} ymm2 = ymm8[1,3,3,1] -; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[0,1,8,9],zero,zero,ymm2[u,u,u,u,u,u,u,u,2,3,18,19],zero,zero,ymm2[u,u,u,u,u,u,u,u,28,29,20,21] -; AVX512DQ-FCP-NEXT: vmovdqa {{.*#+}} ymm3 = [65535,65535,0,65535,65535,65535,65535,65535,65535,0,65535,65535,65535,65535,65535,65535] -; AVX512DQ-FCP-NEXT: vpternlogd {{.*#+}} ymm3 = (mem & ~ymm3) | ymm2 -; AVX512DQ-FCP-NEXT: vpternlogq {{.*#+}} ymm3 = ymm1 ^ (mem & (ymm3 ^ ymm1)) -; AVX512DQ-FCP-NEXT: vinserti32x4 $2, %xmm0, %zmm3, %zmm1 -; AVX512DQ-FCP-NEXT: vpermq {{.*#+}} ymm2 = ymm7[0,2,0,2] -; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} ymm2 = zero,zero,zero,zero,ymm2[0,1,8,9,u,u,u,u,u,u],zero,zero,zero,zero,ymm2[18,19,26,27,u,u,u,u,u,u],zero,zero,zero,zero -; AVX512DQ-FCP-NEXT: vpermq {{.*#+}} ymm3 = ymm6[0,2,1,3] -; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} ymm3 = zero,zero,zero,zero,ymm3[u,u,u,u,u,u,6,7,14,15],zero,zero,zero,zero,ymm3[u,u,u,u,u,u,16,17,24,25],zero,zero,zero,zero -; AVX512DQ-FCP-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 -; AVX512DQ-FCP-NEXT: vpermq {{.*#+}} ymm3 = ymm6[0,2,2,0] -; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} ymm3 = ymm3[0,1,8,9],zero,zero,zero,zero,ymm3[u,u,u,u,u,u,2,3,18,19],zero,zero,zero,zero,ymm3[u,u,u,u,u,u,28,29,20,21] -; AVX512DQ-FCP-NEXT: vpmovsxbd {{.*#+}} ymm4 = [1,5,0,0,5,2,6,0] -; AVX512DQ-FCP-NEXT: vpermd %ymm7, %ymm4, %ymm4 -; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} ymm4 = ymm4[0,1,4,5,u,u,u,u,u,u],zero,zero,zero,zero,ymm4[2,3,18,19,u,u,u,u,u,u],zero,zero,zero,zero,ymm4[20,21,24,25] -; AVX512DQ-FCP-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3 -; AVX512DQ-FCP-NEXT: vporq %zmm2, %zmm3, %zmm2 -; AVX512DQ-FCP-NEXT: vpermq {{.*#+}} ymm3 = ymm8[0,2,0,2] -; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} ymm4 = ymm3[u,u,u,u,u,u,u,u,0,1,8,9],zero,zero,ymm3[u,u,u,u,u,u,u,u,18,19,26,27],zero,zero,ymm3[u,u,u,u] -; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} ymm3 = ymm3[u,u,u,u,4,5,12,13],zero,zero,ymm3[u,u,u,u,u,u,u,u,22,23,30,31],zero,zero,ymm3[u,u,u,u,u,u,u,u] -; AVX512DQ-FCP-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3 -; AVX512DQ-FCP-NEXT: vpbroadcastd (%r10), %ymm4 -; AVX512DQ-FCP-NEXT: vpbroadcastd 4(%r10), %ymm5 -; AVX512DQ-FCP-NEXT: vinserti64x4 $1, %ymm5, %zmm4, %zmm4 -; AVX512DQ-FCP-NEXT: vpternlogq {{.*#+}} zmm4 = (zmm4 & mem) | zmm3 -; AVX512DQ-FCP-NEXT: vpternlogq {{.*#+}} zmm4 = zmm4 ^ (mem & (zmm4 ^ zmm2)) ; AVX512DQ-FCP-NEXT: vmovdqa %xmm0, 96(%rax) -; AVX512DQ-FCP-NEXT: vmovdqa64 %zmm4, (%rax) -; AVX512DQ-FCP-NEXT: vmovdqa %ymm1, 64(%rax) +; AVX512DQ-FCP-NEXT: vmovdqa %ymm8, 64(%rax) +; AVX512DQ-FCP-NEXT: vmovdqa64 %zmm11, (%rax) ; AVX512DQ-FCP-NEXT: vzeroupper ; AVX512DQ-FCP-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-5.ll b/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-5.ll index 75f8469c266b1..39f8a93a7b77a 100644 --- a/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-5.ll +++ b/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-5.ll @@ -725,9 +725,8 @@ define void @store_i8_stride5_vf8(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vecp ; AVX512-NEXT: vmovd %eax, %xmm1 ; AVX512-NEXT: vpbroadcastw %xmm1, %xmm1 ; AVX512-NEXT: vpternlogq {{.*#+}} xmm1 = xmm1 ^ (mem & (xmm1 ^ xmm0)) -; AVX512-NEXT: vinserti32x4 $2, %xmm1, %zmm3, %zmm0 ; AVX512-NEXT: vmovq %xmm1, 32(%r9) -; AVX512-NEXT: vmovdqa %ymm0, (%r9) +; AVX512-NEXT: vmovdqa %ymm3, (%r9) ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq ; @@ -756,9 +755,8 @@ define void @store_i8_stride5_vf8(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vecp ; AVX512-FCP-NEXT: vmovd %eax, %xmm1 ; AVX512-FCP-NEXT: vpbroadcastw %xmm1, %xmm1 ; AVX512-FCP-NEXT: vpternlogq {{.*#+}} xmm1 = xmm1 ^ (mem & (xmm1 ^ xmm0)) -; AVX512-FCP-NEXT: vinserti32x4 $2, %xmm1, %zmm3, %zmm0 ; AVX512-FCP-NEXT: vmovq %xmm1, 32(%r9) -; AVX512-FCP-NEXT: vmovdqa %ymm0, (%r9) +; AVX512-FCP-NEXT: vmovdqa %ymm3, (%r9) ; AVX512-FCP-NEXT: vzeroupper ; AVX512-FCP-NEXT: retq ; @@ -787,9 +785,8 @@ define void @store_i8_stride5_vf8(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vecp ; AVX512DQ-NEXT: vmovd %eax, %xmm1 ; AVX512DQ-NEXT: vpbroadcastw %xmm1, %xmm1 ; AVX512DQ-NEXT: vpternlogq {{.*#+}} xmm1 = xmm1 ^ (mem & (xmm1 ^ xmm0)) -; AVX512DQ-NEXT: vinserti32x4 $2, %xmm1, %zmm3, %zmm0 ; AVX512DQ-NEXT: vmovq %xmm1, 32(%r9) -; AVX512DQ-NEXT: vmovdqa %ymm0, (%r9) +; AVX512DQ-NEXT: vmovdqa %ymm3, (%r9) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; @@ -818,9 +815,8 @@ define void @store_i8_stride5_vf8(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vecp ; AVX512DQ-FCP-NEXT: vmovd %eax, %xmm1 ; AVX512DQ-FCP-NEXT: vpbroadcastw %xmm1, %xmm1 ; AVX512DQ-FCP-NEXT: vpternlogq {{.*#+}} xmm1 = xmm1 ^ (mem & (xmm1 ^ xmm0)) -; AVX512DQ-FCP-NEXT: vinserti32x4 $2, %xmm1, %zmm3, %zmm0 ; AVX512DQ-FCP-NEXT: vmovq %xmm1, 32(%r9) -; AVX512DQ-FCP-NEXT: vmovdqa %ymm0, (%r9) +; AVX512DQ-FCP-NEXT: vmovdqa %ymm3, (%r9) ; AVX512DQ-FCP-NEXT: vzeroupper ; AVX512DQ-FCP-NEXT: retq ; @@ -852,9 +848,8 @@ define void @store_i8_stride5_vf8(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vecp ; AVX512BW-NEXT: movw $132, %ax ; AVX512BW-NEXT: kmovd %eax, %k1 ; AVX512BW-NEXT: vmovdqu8 %xmm1, %xmm0 {%k1} -; AVX512BW-NEXT: vinserti32x4 $2, %xmm0, %zmm2, %zmm1 ; AVX512BW-NEXT: vmovq %xmm0, 32(%r9) -; AVX512BW-NEXT: vmovdqa %ymm1, (%r9) +; AVX512BW-NEXT: vmovdqa %ymm2, (%r9) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; @@ -886,9 +881,8 @@ define void @store_i8_stride5_vf8(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vecp ; AVX512BW-FCP-NEXT: movw $132, %ax ; AVX512BW-FCP-NEXT: kmovd %eax, %k1 ; AVX512BW-FCP-NEXT: vmovdqu8 %xmm1, %xmm0 {%k1} -; AVX512BW-FCP-NEXT: vinserti32x4 $2, %xmm0, %zmm2, %zmm1 ; AVX512BW-FCP-NEXT: vmovq %xmm0, 32(%r9) -; AVX512BW-FCP-NEXT: vmovdqa %ymm1, (%r9) +; AVX512BW-FCP-NEXT: vmovdqa %ymm2, (%r9) ; AVX512BW-FCP-NEXT: vzeroupper ; AVX512BW-FCP-NEXT: retq ; @@ -920,9 +914,8 @@ define void @store_i8_stride5_vf8(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vecp ; AVX512DQ-BW-NEXT: movw $132, %ax ; AVX512DQ-BW-NEXT: kmovd %eax, %k1 ; AVX512DQ-BW-NEXT: vmovdqu8 %xmm1, %xmm0 {%k1} -; AVX512DQ-BW-NEXT: vinserti32x4 $2, %xmm0, %zmm2, %zmm1 ; AVX512DQ-BW-NEXT: vmovq %xmm0, 32(%r9) -; AVX512DQ-BW-NEXT: vmovdqa %ymm1, (%r9) +; AVX512DQ-BW-NEXT: vmovdqa %ymm2, (%r9) ; AVX512DQ-BW-NEXT: vzeroupper ; AVX512DQ-BW-NEXT: retq ; @@ -954,9 +947,8 @@ define void @store_i8_stride5_vf8(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vecp ; AVX512DQ-BW-FCP-NEXT: movw $132, %ax ; AVX512DQ-BW-FCP-NEXT: kmovd %eax, %k1 ; AVX512DQ-BW-FCP-NEXT: vmovdqu8 %xmm1, %xmm0 {%k1} -; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $2, %xmm0, %zmm2, %zmm1 ; AVX512DQ-BW-FCP-NEXT: vmovq %xmm0, 32(%r9) -; AVX512DQ-BW-FCP-NEXT: vmovdqa %ymm1, (%r9) +; AVX512DQ-BW-FCP-NEXT: vmovdqa %ymm2, (%r9) ; AVX512DQ-BW-FCP-NEXT: vzeroupper ; AVX512DQ-BW-FCP-NEXT: retq %in.vec0 = load <8 x i8>, ptr %in.vecptr0, align 64 diff --git a/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-7.ll b/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-7.ll index 328d55ca8d627..b82e663528398 100644 --- a/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-7.ll +++ b/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-7.ll @@ -2098,10 +2098,9 @@ define void @store_i8_stride7_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec ; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = zero,xmm0[13,u,u,u,u],zero,zero,xmm0[14,u,u,u,u],zero,zero,xmm0[15] ; AVX512-NEXT: vpor %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vpternlogq {{.*#+}} xmm0 = xmm0 ^ (mem & (xmm0 ^ xmm3)) -; AVX512-NEXT: vinserti32x4 $2, %xmm0, %zmm5, %zmm1 +; AVX512-NEXT: vmovdqa %ymm5, 64(%rax) ; AVX512-NEXT: vmovdqa %xmm0, 96(%rax) ; AVX512-NEXT: vmovdqa64 %zmm8, (%rax) -; AVX512-NEXT: vmovdqa %ymm1, 64(%rax) ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq ; @@ -2164,10 +2163,9 @@ define void @store_i8_stride7_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec ; AVX512-FCP-NEXT: vpshufb {{.*#+}} xmm0 = zero,xmm0[13,u,u,u,u],zero,zero,xmm0[14,u,u,u,u],zero,zero,xmm0[15] ; AVX512-FCP-NEXT: vpor %xmm0, %xmm1, %xmm0 ; AVX512-FCP-NEXT: vpternlogq {{.*#+}} xmm0 = xmm0 ^ (mem & (xmm0 ^ xmm3)) -; AVX512-FCP-NEXT: vinserti32x4 $2, %xmm0, %zmm5, %zmm1 +; AVX512-FCP-NEXT: vmovdqa %ymm5, 64(%rax) ; AVX512-FCP-NEXT: vmovdqa %xmm0, 96(%rax) ; AVX512-FCP-NEXT: vmovdqa64 %zmm7, (%rax) -; AVX512-FCP-NEXT: vmovdqa %ymm1, 64(%rax) ; AVX512-FCP-NEXT: vzeroupper ; AVX512-FCP-NEXT: retq ; @@ -2235,10 +2233,9 @@ define void @store_i8_stride7_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec ; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = zero,xmm0[13,u,u,u,u],zero,zero,xmm0[14,u,u,u,u],zero,zero,xmm0[15] ; AVX512DQ-NEXT: vpor %xmm0, %xmm1, %xmm0 ; AVX512DQ-NEXT: vpternlogq {{.*#+}} xmm0 = xmm0 ^ (mem & (xmm0 ^ xmm3)) -; AVX512DQ-NEXT: vinserti32x4 $2, %xmm0, %zmm5, %zmm1 +; AVX512DQ-NEXT: vmovdqa %ymm5, 64(%rax) ; AVX512DQ-NEXT: vmovdqa %xmm0, 96(%rax) ; AVX512DQ-NEXT: vmovdqa64 %zmm8, (%rax) -; AVX512DQ-NEXT: vmovdqa %ymm1, 64(%rax) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; @@ -2301,10 +2298,9 @@ define void @store_i8_stride7_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec ; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} xmm0 = zero,xmm0[13,u,u,u,u],zero,zero,xmm0[14,u,u,u,u],zero,zero,xmm0[15] ; AVX512DQ-FCP-NEXT: vpor %xmm0, %xmm1, %xmm0 ; AVX512DQ-FCP-NEXT: vpternlogq {{.*#+}} xmm0 = xmm0 ^ (mem & (xmm0 ^ xmm3)) -; AVX512DQ-FCP-NEXT: vinserti32x4 $2, %xmm0, %zmm5, %zmm1 +; AVX512DQ-FCP-NEXT: vmovdqa %ymm5, 64(%rax) ; AVX512DQ-FCP-NEXT: vmovdqa %xmm0, 96(%rax) ; AVX512DQ-FCP-NEXT: vmovdqa64 %zmm7, (%rax) -; AVX512DQ-FCP-NEXT: vmovdqa %ymm1, 64(%rax) ; AVX512DQ-FCP-NEXT: vzeroupper ; AVX512DQ-FCP-NEXT: retq ; @@ -2314,80 +2310,79 @@ define void @store_i8_stride7_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec ; AVX512BW-NEXT: movq {{[0-9]+}}(%rsp), %r10 ; AVX512BW-NEXT: vmovdqa (%rdi), %xmm0 ; AVX512BW-NEXT: vmovdqa (%rdx), %xmm1 -; AVX512BW-NEXT: vmovdqa (%r8), %xmm3 -; AVX512BW-NEXT: vinserti128 $1, (%rcx), %ymm1, %ymm2 -; AVX512BW-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm4 -; AVX512BW-NEXT: vinserti128 $1, (%r9), %ymm3, %ymm0 -; AVX512BW-NEXT: vinserti32x4 $2, (%r10), %zmm0, %zmm0 -; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512BW-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; AVX512BW-NEXT: vpshufb {{.*#+}} xmm3 = xmm1[10,u,u,u,u,u,13,12,u,u,u,u,u,15,14,u] -; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512BW-NEXT: vpshufhw {{.*#+}} xmm5 = xmm1[0,1,2,3,6,7,7,7] -; AVX512BW-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[2,1,3,2] +; AVX512BW-NEXT: vmovdqa (%r8), %xmm2 +; AVX512BW-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm0 +; AVX512BW-NEXT: vinserti128 $1, (%rcx), %ymm1, %ymm1 +; AVX512BW-NEXT: vinserti128 $1, (%r9), %ymm2, %ymm2 +; AVX512BW-NEXT: vinserti32x4 $2, (%r10), %zmm2, %zmm2 +; AVX512BW-NEXT: vpshufb {{.*#+}} ymm3 = zero,zero,zero,zero,zero,ymm1[5],zero,zero,zero,zero,zero,zero,ymm1[6],zero,zero,zero,zero,zero,zero,zero,ymm1[23],zero,zero,zero,zero,zero,zero,ymm1[24],zero,zero,zero,zero +; AVX512BW-NEXT: vpermq {{.*#+}} ymm4 = ymm1[2,3,0,1] +; AVX512BW-NEXT: vpshufb {{.*#+}} ymm4 = zero,zero,zero,zero,zero,zero,ymm4[5],zero,zero,zero,zero,zero,zero,ymm4[6],zero,zero,zero,zero,zero,ymm4[23],zero,zero,zero,zero,zero,zero,ymm4[24],zero,zero,zero,zero,zero +; AVX512BW-NEXT: vpor %ymm3, %ymm4, %ymm3 +; AVX512BW-NEXT: vpshufb {{.*#+}} ymm4 = zero,zero,zero,ymm0[5],zero,zero,zero,zero,zero,zero,ymm0[6],zero,zero,zero,zero,zero,zero,zero,ymm0[23],zero,zero,zero,zero,zero,zero,ymm0[24],zero,zero,zero,zero,zero,zero +; AVX512BW-NEXT: vpermq {{.*#+}} ymm5 = ymm0[2,3,0,1] +; AVX512BW-NEXT: vpshufb {{.*#+}} ymm5 = zero,zero,zero,zero,ymm5[5],zero,zero,zero,zero,zero,zero,ymm5[6],zero,zero,zero,zero,zero,ymm5[23],zero,zero,zero,zero,zero,zero,ymm5[24],zero,zero,zero,zero,zero,zero,ymm5[25] +; AVX512BW-NEXT: vpor %ymm5, %ymm4, %ymm4 +; AVX512BW-NEXT: movl $202911840, %ecx # imm = 0xC183060 +; AVX512BW-NEXT: kmovd %ecx, %k1 +; AVX512BW-NEXT: vmovdqu8 %ymm3, %ymm4 {%k1} +; AVX512BW-NEXT: vpermq {{.*#+}} ymm3 = ymm0[0,2,0,2] +; AVX512BW-NEXT: vpshufb {{.*#+}} ymm3 = ymm3[0,8],zero,zero,zero,zero,zero,ymm3[1,9],zero,zero,zero,zero,zero,ymm3[2,10],zero,zero,zero,zero,zero,ymm3[19,27],zero,zero,zero,zero,zero,ymm3[20,28],zero,zero +; AVX512BW-NEXT: vpermq {{.*#+}} ymm5 = ymm1[0,2,0,2] +; AVX512BW-NEXT: vpshufb {{.*#+}} ymm5 = zero,zero,ymm5[0,8],zero,zero,zero,zero,zero,ymm5[1,9],zero,zero,zero,zero,zero,ymm5[18,26],zero,zero,zero,zero,zero,ymm5[19,27],zero,zero,zero,zero,zero,ymm5[20,28] +; AVX512BW-NEXT: vpor %ymm3, %ymm5, %ymm3 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3 +; AVX512BW-NEXT: vpshufb {{.*#+}} ymm4 = ymm2[4],zero,zero,zero,zero,zero,zero,ymm2[5],zero,zero,zero,zero,zero,zero,ymm2[6],zero,zero,zero,zero,zero,zero,zero,ymm2[23],zero,zero,zero,zero,zero,zero,ymm2[24],zero,zero +; AVX512BW-NEXT: vpermq {{.*#+}} ymm5 = ymm2[2,3,0,1] +; AVX512BW-NEXT: vpshufb {{.*#+}} ymm5 = zero,ymm5[4],zero,zero,zero,zero,zero,zero,ymm5[5],zero,zero,zero,zero,zero,zero,ymm5[6],zero,zero,zero,zero,zero,ymm5[23],zero,zero,zero,zero,zero,zero,ymm5[24],zero,zero,zero +; AVX512BW-NEXT: vpor %ymm4, %ymm5, %ymm4 +; AVX512BW-NEXT: vpermq {{.*#+}} ymm5 = ymm2[0,2,0,2] +; AVX512BW-NEXT: vpshufb {{.*#+}} ymm5 = ymm5[u,u,u,u,0,8,u,u,u,u,u,1,9,u,u,u,u,u,18,26,u,u,u,u,u,19,27,u,u,u,u,u] +; AVX512BW-NEXT: vinserti64x4 $1, %ymm4, %zmm5, %zmm4 +; AVX512BW-NEXT: vextracti64x4 $1, %zmm2, %ymm5 +; AVX512BW-NEXT: vpmovsxbw {{.*#+}} zmm6 = [1,1,0,0,1,1,0,0,4,5,1,1,1,1,0,0,2,2,2,4,2,2,2,4,3,3,3,3,2,2,2,4] +; AVX512BW-NEXT: vpermw %zmm5, %zmm6, %zmm6 +; AVX512BW-NEXT: movabsq $4647998506761461824, %rcx # imm = 0x4081020408102040 +; AVX512BW-NEXT: kmovq %rcx, %k1 +; AVX512BW-NEXT: vmovdqu8 %zmm6, %zmm4 {%k1} +; AVX512BW-NEXT: movabsq $8133997386832558192, %rcx # imm = 0x70E1C3870E1C3870 +; AVX512BW-NEXT: kmovq %rcx, %k1 +; AVX512BW-NEXT: vmovdqu8 %zmm4, %zmm3 {%k1} +; AVX512BW-NEXT: vextracti128 $1, %ymm2, %xmm4 +; AVX512BW-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm4[8],xmm2[8],xmm4[9],xmm2[9],xmm4[10],xmm2[10],xmm4[11],xmm2[11],xmm4[12],xmm2[12],xmm4[13],xmm2[13],xmm4[14],xmm2[14],xmm4[15],xmm2[15] +; AVX512BW-NEXT: vpshufb {{.*#+}} xmm4 = xmm4[10,u,u,u,u,u,13,12,u,u,u,u,u,15,14,u] +; AVX512BW-NEXT: vpshufhw {{.*#+}} xmm6 = xmm5[0,1,2,3,6,7,7,7] +; AVX512BW-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[2,1,3,2] ; AVX512BW-NEXT: movw $-32510, %cx # imm = 0x8102 ; AVX512BW-NEXT: kmovd %ecx, %k1 -; AVX512BW-NEXT: vmovdqu8 %xmm5, %xmm3 {%k1} -; AVX512BW-NEXT: vpermq {{.*#+}} ymm5 = ymm4[1,3,2,3] -; AVX512BW-NEXT: vpshufb {{.*#+}} xmm5 = zero,zero,xmm5[6,14],zero,zero,zero,zero,zero,xmm5[7,15],zero,zero,zero,zero,zero -; AVX512BW-NEXT: vpermq {{.*#+}} ymm6 = ymm2[1,3,2,3] -; AVX512BW-NEXT: vpshufb {{.*#+}} xmm6 = zero,zero,zero,zero,xmm6[6,14],zero,zero,zero,zero,zero,xmm6[7,15],zero,zero,zero -; AVX512BW-NEXT: vpor %xmm5, %xmm6, %xmm5 +; AVX512BW-NEXT: vmovdqu8 %xmm6, %xmm4 {%k1} +; AVX512BW-NEXT: vpermq {{.*#+}} ymm6 = ymm0[1,3,2,3] +; AVX512BW-NEXT: vpshufb {{.*#+}} xmm6 = zero,zero,xmm6[6,14],zero,zero,zero,zero,zero,xmm6[7,15],zero,zero,zero,zero,zero +; AVX512BW-NEXT: vpermq {{.*#+}} ymm7 = ymm1[1,3,2,3] +; AVX512BW-NEXT: vpshufb {{.*#+}} xmm7 = zero,zero,zero,zero,xmm7[6,14],zero,zero,zero,zero,zero,xmm7[7,15],zero,zero,zero +; AVX512BW-NEXT: vpor %xmm6, %xmm7, %xmm6 ; AVX512BW-NEXT: movw $-7741, %cx # imm = 0xE1C3 ; AVX512BW-NEXT: kmovd %ecx, %k1 -; AVX512BW-NEXT: vmovdqu8 %xmm3, %xmm5 {%k1} -; AVX512BW-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [4,5,4,5,5,6,5,6,4,5,4,5,5,6,5,6] -; AVX512BW-NEXT: # ymm3 = mem[0,1,0,1] -; AVX512BW-NEXT: vpermw %ymm1, %ymm3, %ymm3 -; AVX512BW-NEXT: vpermq {{.*#+}} ymm6 = ymm0[1,3,1,3] -; AVX512BW-NEXT: vpshufb {{.*#+}} ymm6 = ymm6[u,u,u,1,9,u,u,u,u,u,2,10,u,u,u,u,u,19,27,u,u,u,u,u,20,28,u,u,u,u,u,21] +; AVX512BW-NEXT: vmovdqu8 %xmm4, %xmm6 {%k1} +; AVX512BW-NEXT: vbroadcasti128 {{.*#+}} ymm4 = [4,5,4,5,5,6,5,6,4,5,4,5,5,6,5,6] +; AVX512BW-NEXT: # ymm4 = mem[0,1,0,1] +; AVX512BW-NEXT: vpermw %ymm5, %ymm4, %ymm4 +; AVX512BW-NEXT: vpermq {{.*#+}} ymm2 = ymm2[1,3,1,3] +; AVX512BW-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[u,u,u,1,9,u,u,u,u,u,2,10,u,u,u,u,u,19,27,u,u,u,u,u,20,28,u,u,u,u,u,21] ; AVX512BW-NEXT: movl $67637280, %ecx # imm = 0x4081020 ; AVX512BW-NEXT: kmovd %ecx, %k1 -; AVX512BW-NEXT: vmovdqu8 %ymm3, %ymm6 {%k1} -; AVX512BW-NEXT: vpermq {{.*#+}} ymm3 = ymm2[1,3,3,1] -; AVX512BW-NEXT: vpshufb {{.*#+}} ymm3 = zero,ymm3[1,9],zero,zero,zero,zero,zero,ymm3[2,10],zero,zero,zero,zero,zero,ymm3[3,19],zero,zero,zero,zero,zero,ymm3[28,20],zero,zero,zero,zero,zero,ymm3[29,21],zero -; AVX512BW-NEXT: vpermq {{.*#+}} ymm7 = ymm4[3,1,1,3] -; AVX512BW-NEXT: vpshufb {{.*#+}} ymm7 = ymm7[1],zero,zero,zero,zero,zero,ymm7[10,2],zero,zero,zero,zero,zero,ymm7[11,3],zero,zero,zero,zero,zero,ymm7[20,28],zero,zero,zero,zero,zero,ymm7[21,29],zero,zero,zero -; AVX512BW-NEXT: vpor %ymm3, %ymm7, %ymm3 +; AVX512BW-NEXT: vmovdqu8 %ymm4, %ymm2 {%k1} +; AVX512BW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,3,3,1] +; AVX512BW-NEXT: vpshufb {{.*#+}} ymm1 = zero,ymm1[1,9],zero,zero,zero,zero,zero,ymm1[2,10],zero,zero,zero,zero,zero,ymm1[3,19],zero,zero,zero,zero,zero,ymm1[28,20],zero,zero,zero,zero,zero,ymm1[29,21],zero +; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,1,1,3] +; AVX512BW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1],zero,zero,zero,zero,zero,ymm0[10,2],zero,zero,zero,zero,zero,ymm0[11,3],zero,zero,zero,zero,zero,ymm0[20,28],zero,zero,zero,zero,zero,ymm0[21,29],zero,zero,zero +; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512BW-NEXT: movl $-2029118408, %ecx # imm = 0x870E1C38 ; AVX512BW-NEXT: kmovd %ecx, %k1 -; AVX512BW-NEXT: vmovdqu8 %ymm6, %ymm3 {%k1} -; AVX512BW-NEXT: vinserti32x4 $2, %xmm5, %zmm3, %zmm3 -; AVX512BW-NEXT: vpshufb {{.*#+}} ymm6 = zero,zero,zero,zero,zero,ymm2[5],zero,zero,zero,zero,zero,zero,ymm2[6],zero,zero,zero,zero,zero,zero,zero,ymm2[23],zero,zero,zero,zero,zero,zero,ymm2[24],zero,zero,zero,zero -; AVX512BW-NEXT: vpermq {{.*#+}} ymm7 = ymm2[2,3,0,1] -; AVX512BW-NEXT: vpshufb {{.*#+}} ymm7 = zero,zero,zero,zero,zero,zero,ymm7[5],zero,zero,zero,zero,zero,zero,ymm7[6],zero,zero,zero,zero,zero,ymm7[23],zero,zero,zero,zero,zero,zero,ymm7[24],zero,zero,zero,zero,zero -; AVX512BW-NEXT: vpor %ymm6, %ymm7, %ymm6 -; AVX512BW-NEXT: vpshufb {{.*#+}} ymm7 = zero,zero,zero,ymm4[5],zero,zero,zero,zero,zero,zero,ymm4[6],zero,zero,zero,zero,zero,zero,zero,ymm4[23],zero,zero,zero,zero,zero,zero,ymm4[24],zero,zero,zero,zero,zero,zero -; AVX512BW-NEXT: vpermq {{.*#+}} ymm8 = ymm4[2,3,0,1] -; AVX512BW-NEXT: vpshufb {{.*#+}} ymm8 = zero,zero,zero,zero,ymm8[5],zero,zero,zero,zero,zero,zero,ymm8[6],zero,zero,zero,zero,zero,ymm8[23],zero,zero,zero,zero,zero,zero,ymm8[24],zero,zero,zero,zero,zero,zero,ymm8[25] -; AVX512BW-NEXT: vpor %ymm7, %ymm8, %ymm7 -; AVX512BW-NEXT: movl $202911840, %ecx # imm = 0xC183060 -; AVX512BW-NEXT: kmovd %ecx, %k1 -; AVX512BW-NEXT: vmovdqu8 %ymm6, %ymm7 {%k1} -; AVX512BW-NEXT: vpermq {{.*#+}} ymm4 = ymm4[0,2,0,2] -; AVX512BW-NEXT: vpshufb {{.*#+}} ymm4 = ymm4[0,8],zero,zero,zero,zero,zero,ymm4[1,9],zero,zero,zero,zero,zero,ymm4[2,10],zero,zero,zero,zero,zero,ymm4[19,27],zero,zero,zero,zero,zero,ymm4[20,28],zero,zero -; AVX512BW-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,0,2] -; AVX512BW-NEXT: vpshufb {{.*#+}} ymm2 = zero,zero,ymm2[0,8],zero,zero,zero,zero,zero,ymm2[1,9],zero,zero,zero,zero,zero,ymm2[18,26],zero,zero,zero,zero,zero,ymm2[19,27],zero,zero,zero,zero,zero,ymm2[20,28] -; AVX512BW-NEXT: vpor %ymm4, %ymm2, %ymm2 -; AVX512BW-NEXT: vinserti64x4 $1, %ymm7, %zmm2, %zmm2 -; AVX512BW-NEXT: vpshufb {{.*#+}} ymm4 = ymm0[4],zero,zero,zero,zero,zero,zero,ymm0[5],zero,zero,zero,zero,zero,zero,ymm0[6],zero,zero,zero,zero,zero,zero,zero,ymm0[23],zero,zero,zero,zero,zero,zero,ymm0[24],zero,zero -; AVX512BW-NEXT: vpermq {{.*#+}} ymm6 = ymm0[2,3,0,1] -; AVX512BW-NEXT: vpshufb {{.*#+}} ymm6 = zero,ymm6[4],zero,zero,zero,zero,zero,zero,ymm6[5],zero,zero,zero,zero,zero,zero,ymm6[6],zero,zero,zero,zero,zero,ymm6[23],zero,zero,zero,zero,zero,zero,ymm6[24],zero,zero,zero -; AVX512BW-NEXT: vpor %ymm4, %ymm6, %ymm4 -; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,2] -; AVX512BW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,0,8,u,u,u,u,u,1,9,u,u,u,u,u,18,26,u,u,u,u,u,19,27,u,u,u,u,u] -; AVX512BW-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpmovsxbw {{.*#+}} zmm4 = [1,1,0,0,1,1,0,0,4,5,1,1,1,1,0,0,2,2,2,4,2,2,2,4,3,3,3,3,2,2,2,4] -; AVX512BW-NEXT: vpermw %zmm1, %zmm4, %zmm1 -; AVX512BW-NEXT: movabsq $4647998506761461824, %rcx # imm = 0x4081020408102040 -; AVX512BW-NEXT: kmovq %rcx, %k1 -; AVX512BW-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1} -; AVX512BW-NEXT: movabsq $8133997386832558192, %rcx # imm = 0x70E1C3870E1C3870 -; AVX512BW-NEXT: kmovq %rcx, %k1 -; AVX512BW-NEXT: vmovdqu8 %zmm0, %zmm2 {%k1} -; AVX512BW-NEXT: vmovdqa %xmm5, 96(%rax) -; AVX512BW-NEXT: vmovdqa64 %zmm2, (%rax) -; AVX512BW-NEXT: vmovdqa %ymm3, 64(%rax) +; AVX512BW-NEXT: vmovdqu8 %ymm2, %ymm0 {%k1} +; AVX512BW-NEXT: vmovdqa %ymm0, 64(%rax) +; AVX512BW-NEXT: vmovdqa %xmm6, 96(%rax) +; AVX512BW-NEXT: vmovdqa64 %zmm3, (%rax) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; @@ -2417,43 +2412,42 @@ define void @store_i8_stride7_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec ; AVX512BW-FCP-NEXT: movw $-7741, %cx # imm = 0xE1C3 ; AVX512BW-FCP-NEXT: kmovd %ecx, %k1 ; AVX512BW-FCP-NEXT: vmovdqu8 %xmm5, %xmm4 {%k1} -; AVX512BW-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [4,5,4,5,5,6,5,6,4,5,4,5,5,6,5,6] -; AVX512BW-FCP-NEXT: # ymm5 = mem[0,1,0,1] -; AVX512BW-FCP-NEXT: vpermw %ymm6, %ymm5, %ymm5 -; AVX512BW-FCP-NEXT: vpermq {{.*#+}} ymm7 = ymm2[1,3,1,3] -; AVX512BW-FCP-NEXT: vpshufb {{.*#+}} ymm7 = ymm7[u,u,u,1,9,u,u,u,u,u,2,10,u,u,u,u,u,19,27,u,u,u,u,u,20,28,u,u,u,u,u,21] -; AVX512BW-FCP-NEXT: movl $67637280, %ecx # imm = 0x4081020 -; AVX512BW-FCP-NEXT: kmovd %ecx, %k1 -; AVX512BW-FCP-NEXT: vmovdqu8 %ymm5, %ymm7 {%k1} -; AVX512BW-FCP-NEXT: vpermq {{.*#+}} ymm5 = ymm1[1,3,3,1] -; AVX512BW-FCP-NEXT: vpshufb {{.*#+}} ymm5 = zero,ymm5[1,9],zero,zero,zero,zero,zero,ymm5[2,10],zero,zero,zero,zero,zero,ymm5[3,19],zero,zero,zero,zero,zero,ymm5[28,20],zero,zero,zero,zero,zero,ymm5[29,21],zero -; AVX512BW-FCP-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,1,1,3] -; AVX512BW-FCP-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1],zero,zero,zero,zero,zero,ymm0[10,2],zero,zero,zero,zero,zero,ymm0[11,3],zero,zero,zero,zero,zero,ymm0[20,28],zero,zero,zero,zero,zero,ymm0[21,29],zero,zero,zero -; AVX512BW-FCP-NEXT: vpor %ymm5, %ymm0, %ymm0 -; AVX512BW-FCP-NEXT: movl $-2029118408, %ecx # imm = 0x870E1C38 -; AVX512BW-FCP-NEXT: kmovd %ecx, %k1 -; AVX512BW-FCP-NEXT: vmovdqu8 %ymm7, %ymm0 {%k1} -; AVX512BW-FCP-NEXT: vinserti32x4 $2, %xmm4, %zmm0, %zmm0 ; AVX512BW-FCP-NEXT: vpmovsxbw {{.*#+}} zmm5 = [1,1,0,0,1,1,0,0,4,5,1,1,1,1,0,0,2,2,2,4,2,2,2,4,3,3,3,3,2,2,2,4] ; AVX512BW-FCP-NEXT: vpermw %zmm6, %zmm5, %zmm5 -; AVX512BW-FCP-NEXT: vpmovsxbd {{.*#+}} zmm6 = [0,0,4,0,0,0,4,0,1,5,0,0,1,5,2,6] -; AVX512BW-FCP-NEXT: vpermd %zmm2, %zmm6, %zmm2 -; AVX512BW-FCP-NEXT: vpshufb {{.*#+}} zmm2 = zmm2[u,u,u,u,0,8,u,u,u,u,u,1,9,u,u,u,u,u,18,26,u,u,u,u,u,19,27,u,u,u,u,u,32,36,u,u,u,u,u,33,37,u,u,u,u,u,34,38,u,u,u,u,u,51,55,u,u,u,u,u,56,60,u,u] +; AVX512BW-FCP-NEXT: vpmovsxbd {{.*#+}} zmm7 = [0,0,4,0,0,0,4,0,1,5,0,0,1,5,2,6] +; AVX512BW-FCP-NEXT: vpermd %zmm2, %zmm7, %zmm7 +; AVX512BW-FCP-NEXT: vpshufb {{.*#+}} zmm7 = zmm7[u,u,u,u,0,8,u,u,u,u,u,1,9,u,u,u,u,u,18,26,u,u,u,u,u,19,27,u,u,u,u,u,32,36,u,u,u,u,u,33,37,u,u,u,u,u,34,38,u,u,u,u,u,51,55,u,u,u,u,u,56,60,u,u] ; AVX512BW-FCP-NEXT: movabsq $4647998506761461824, %rcx # imm = 0x4081020408102040 ; AVX512BW-FCP-NEXT: kmovq %rcx, %k1 -; AVX512BW-FCP-NEXT: vmovdqu8 %zmm5, %zmm2 {%k1} +; AVX512BW-FCP-NEXT: vmovdqu8 %zmm5, %zmm7 {%k1} ; AVX512BW-FCP-NEXT: vpmovsxbd {{.*#+}} zmm5 = [0,0,4,0,0,1,4,5,1,5,0,0,1,5,2,6] ; AVX512BW-FCP-NEXT: vpermd %zmm3, %zmm5, %zmm3 ; AVX512BW-FCP-NEXT: vpshufb {{.*#+}} zmm3 = zmm3[0,8],zero,zero,zero,zero,zero,zmm3[1,9],zero,zero,zero,zero,zero,zmm3[2,10],zero,zero,zero,zero,zero,zmm3[19,27],zero,zero,zero,zero,zero,zmm3[20,28],zero,zero,zero,zero,zero,zmm3[33,37],zero,zero,zero,zero,zero,zmm3[34,38],zero,zero,zero,zero,zero,zmm3[51,55],zero,zero,zero,zero,zero,zmm3[56,60],zero,zero,zero,zero,zero,zmm3[57] -; AVX512BW-FCP-NEXT: vpermd %zmm1, %zmm5, %zmm1 -; AVX512BW-FCP-NEXT: vpshufb {{.*#+}} zmm1 = zero,zero,zmm1[0,8],zero,zero,zero,zero,zero,zmm1[1,9],zero,zero,zero,zero,zero,zmm1[18,26],zero,zero,zero,zero,zero,zmm1[19,27],zero,zero,zero,zero,zero,zmm1[20,28],zero,zero,zero,zero,zero,zmm1[33,37],zero,zero,zero,zero,zero,zmm1[34,38],zero,zero,zero,zero,zero,zmm1[51,55],zero,zero,zero,zero,zero,zmm1[56,60],zero,zero,zero,zero -; AVX512BW-FCP-NEXT: vporq %zmm3, %zmm1, %zmm1 +; AVX512BW-FCP-NEXT: vpermd %zmm1, %zmm5, %zmm5 +; AVX512BW-FCP-NEXT: vpshufb {{.*#+}} zmm5 = zero,zero,zmm5[0,8],zero,zero,zero,zero,zero,zmm5[1,9],zero,zero,zero,zero,zero,zmm5[18,26],zero,zero,zero,zero,zero,zmm5[19,27],zero,zero,zero,zero,zero,zmm5[20,28],zero,zero,zero,zero,zero,zmm5[33,37],zero,zero,zero,zero,zero,zmm5[34,38],zero,zero,zero,zero,zero,zmm5[51,55],zero,zero,zero,zero,zero,zmm5[56,60],zero,zero,zero,zero +; AVX512BW-FCP-NEXT: vporq %zmm3, %zmm5, %zmm3 ; AVX512BW-FCP-NEXT: movabsq $8133997386832558192, %rcx # imm = 0x70E1C3870E1C3870 ; AVX512BW-FCP-NEXT: kmovq %rcx, %k1 -; AVX512BW-FCP-NEXT: vmovdqu8 %zmm2, %zmm1 {%k1} -; AVX512BW-FCP-NEXT: vmovdqa64 %zmm1, (%rax) -; AVX512BW-FCP-NEXT: vmovdqa %xmm4, 96(%rax) +; AVX512BW-FCP-NEXT: vmovdqu8 %zmm7, %zmm3 {%k1} +; AVX512BW-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [4,5,4,5,5,6,5,6,4,5,4,5,5,6,5,6] +; AVX512BW-FCP-NEXT: # ymm5 = mem[0,1,0,1] +; AVX512BW-FCP-NEXT: vpermw %ymm6, %ymm5, %ymm5 +; AVX512BW-FCP-NEXT: vpermq {{.*#+}} ymm2 = ymm2[1,3,1,3] +; AVX512BW-FCP-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[u,u,u,1,9,u,u,u,u,u,2,10,u,u,u,u,u,19,27,u,u,u,u,u,20,28,u,u,u,u,u,21] +; AVX512BW-FCP-NEXT: movl $67637280, %ecx # imm = 0x4081020 +; AVX512BW-FCP-NEXT: kmovd %ecx, %k1 +; AVX512BW-FCP-NEXT: vmovdqu8 %ymm5, %ymm2 {%k1} +; AVX512BW-FCP-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,3,3,1] +; AVX512BW-FCP-NEXT: vpshufb {{.*#+}} ymm1 = zero,ymm1[1,9],zero,zero,zero,zero,zero,ymm1[2,10],zero,zero,zero,zero,zero,ymm1[3,19],zero,zero,zero,zero,zero,ymm1[28,20],zero,zero,zero,zero,zero,ymm1[29,21],zero +; AVX512BW-FCP-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,1,1,3] +; AVX512BW-FCP-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1],zero,zero,zero,zero,zero,ymm0[10,2],zero,zero,zero,zero,zero,ymm0[11,3],zero,zero,zero,zero,zero,ymm0[20,28],zero,zero,zero,zero,zero,ymm0[21,29],zero,zero,zero +; AVX512BW-FCP-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512BW-FCP-NEXT: movl $-2029118408, %ecx # imm = 0x870E1C38 +; AVX512BW-FCP-NEXT: kmovd %ecx, %k1 +; AVX512BW-FCP-NEXT: vmovdqu8 %ymm2, %ymm0 {%k1} ; AVX512BW-FCP-NEXT: vmovdqa %ymm0, 64(%rax) +; AVX512BW-FCP-NEXT: vmovdqa64 %zmm3, (%rax) +; AVX512BW-FCP-NEXT: vmovdqa %xmm4, 96(%rax) ; AVX512BW-FCP-NEXT: vzeroupper ; AVX512BW-FCP-NEXT: retq ; @@ -2463,80 +2457,79 @@ define void @store_i8_stride7_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec ; AVX512DQ-BW-NEXT: movq {{[0-9]+}}(%rsp), %r10 ; AVX512DQ-BW-NEXT: vmovdqa (%rdi), %xmm0 ; AVX512DQ-BW-NEXT: vmovdqa (%rdx), %xmm1 -; AVX512DQ-BW-NEXT: vmovdqa (%r8), %xmm3 -; AVX512DQ-BW-NEXT: vinserti128 $1, (%rcx), %ymm1, %ymm2 -; AVX512DQ-BW-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm4 -; AVX512DQ-BW-NEXT: vinserti128 $1, (%r9), %ymm3, %ymm0 -; AVX512DQ-BW-NEXT: vinserti32x4 $2, (%r10), %zmm0, %zmm0 -; AVX512DQ-BW-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512DQ-BW-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} xmm3 = xmm1[10,u,u,u,u,u,13,12,u,u,u,u,u,15,14,u] -; AVX512DQ-BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512DQ-BW-NEXT: vpshufhw {{.*#+}} xmm5 = xmm1[0,1,2,3,6,7,7,7] -; AVX512DQ-BW-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[2,1,3,2] +; AVX512DQ-BW-NEXT: vmovdqa (%r8), %xmm2 +; AVX512DQ-BW-NEXT: vinserti128 $1, (%rsi), %ymm0, %ymm0 +; AVX512DQ-BW-NEXT: vinserti128 $1, (%rcx), %ymm1, %ymm1 +; AVX512DQ-BW-NEXT: vinserti128 $1, (%r9), %ymm2, %ymm2 +; AVX512DQ-BW-NEXT: vinserti32x4 $2, (%r10), %zmm2, %zmm2 +; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} ymm3 = zero,zero,zero,zero,zero,ymm1[5],zero,zero,zero,zero,zero,zero,ymm1[6],zero,zero,zero,zero,zero,zero,zero,ymm1[23],zero,zero,zero,zero,zero,zero,ymm1[24],zero,zero,zero,zero +; AVX512DQ-BW-NEXT: vpermq {{.*#+}} ymm4 = ymm1[2,3,0,1] +; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} ymm4 = zero,zero,zero,zero,zero,zero,ymm4[5],zero,zero,zero,zero,zero,zero,ymm4[6],zero,zero,zero,zero,zero,ymm4[23],zero,zero,zero,zero,zero,zero,ymm4[24],zero,zero,zero,zero,zero +; AVX512DQ-BW-NEXT: vpor %ymm3, %ymm4, %ymm3 +; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} ymm4 = zero,zero,zero,ymm0[5],zero,zero,zero,zero,zero,zero,ymm0[6],zero,zero,zero,zero,zero,zero,zero,ymm0[23],zero,zero,zero,zero,zero,zero,ymm0[24],zero,zero,zero,zero,zero,zero +; AVX512DQ-BW-NEXT: vpermq {{.*#+}} ymm5 = ymm0[2,3,0,1] +; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} ymm5 = zero,zero,zero,zero,ymm5[5],zero,zero,zero,zero,zero,zero,ymm5[6],zero,zero,zero,zero,zero,ymm5[23],zero,zero,zero,zero,zero,zero,ymm5[24],zero,zero,zero,zero,zero,zero,ymm5[25] +; AVX512DQ-BW-NEXT: vpor %ymm5, %ymm4, %ymm4 +; AVX512DQ-BW-NEXT: movl $202911840, %ecx # imm = 0xC183060 +; AVX512DQ-BW-NEXT: kmovd %ecx, %k1 +; AVX512DQ-BW-NEXT: vmovdqu8 %ymm3, %ymm4 {%k1} +; AVX512DQ-BW-NEXT: vpermq {{.*#+}} ymm3 = ymm0[0,2,0,2] +; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} ymm3 = ymm3[0,8],zero,zero,zero,zero,zero,ymm3[1,9],zero,zero,zero,zero,zero,ymm3[2,10],zero,zero,zero,zero,zero,ymm3[19,27],zero,zero,zero,zero,zero,ymm3[20,28],zero,zero +; AVX512DQ-BW-NEXT: vpermq {{.*#+}} ymm5 = ymm1[0,2,0,2] +; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} ymm5 = zero,zero,ymm5[0,8],zero,zero,zero,zero,zero,ymm5[1,9],zero,zero,zero,zero,zero,ymm5[18,26],zero,zero,zero,zero,zero,ymm5[19,27],zero,zero,zero,zero,zero,ymm5[20,28] +; AVX512DQ-BW-NEXT: vpor %ymm3, %ymm5, %ymm3 +; AVX512DQ-BW-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3 +; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} ymm4 = ymm2[4],zero,zero,zero,zero,zero,zero,ymm2[5],zero,zero,zero,zero,zero,zero,ymm2[6],zero,zero,zero,zero,zero,zero,zero,ymm2[23],zero,zero,zero,zero,zero,zero,ymm2[24],zero,zero +; AVX512DQ-BW-NEXT: vpermq {{.*#+}} ymm5 = ymm2[2,3,0,1] +; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} ymm5 = zero,ymm5[4],zero,zero,zero,zero,zero,zero,ymm5[5],zero,zero,zero,zero,zero,zero,ymm5[6],zero,zero,zero,zero,zero,ymm5[23],zero,zero,zero,zero,zero,zero,ymm5[24],zero,zero,zero +; AVX512DQ-BW-NEXT: vpor %ymm4, %ymm5, %ymm4 +; AVX512DQ-BW-NEXT: vpermq {{.*#+}} ymm5 = ymm2[0,2,0,2] +; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} ymm5 = ymm5[u,u,u,u,0,8,u,u,u,u,u,1,9,u,u,u,u,u,18,26,u,u,u,u,u,19,27,u,u,u,u,u] +; AVX512DQ-BW-NEXT: vinserti64x4 $1, %ymm4, %zmm5, %zmm4 +; AVX512DQ-BW-NEXT: vextracti64x4 $1, %zmm2, %ymm5 +; AVX512DQ-BW-NEXT: vpmovsxbw {{.*#+}} zmm6 = [1,1,0,0,1,1,0,0,4,5,1,1,1,1,0,0,2,2,2,4,2,2,2,4,3,3,3,3,2,2,2,4] +; AVX512DQ-BW-NEXT: vpermw %zmm5, %zmm6, %zmm6 +; AVX512DQ-BW-NEXT: movabsq $4647998506761461824, %rcx # imm = 0x4081020408102040 +; AVX512DQ-BW-NEXT: kmovq %rcx, %k1 +; AVX512DQ-BW-NEXT: vmovdqu8 %zmm6, %zmm4 {%k1} +; AVX512DQ-BW-NEXT: movabsq $8133997386832558192, %rcx # imm = 0x70E1C3870E1C3870 +; AVX512DQ-BW-NEXT: kmovq %rcx, %k1 +; AVX512DQ-BW-NEXT: vmovdqu8 %zmm4, %zmm3 {%k1} +; AVX512DQ-BW-NEXT: vextracti128 $1, %ymm2, %xmm4 +; AVX512DQ-BW-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm4[8],xmm2[8],xmm4[9],xmm2[9],xmm4[10],xmm2[10],xmm4[11],xmm2[11],xmm4[12],xmm2[12],xmm4[13],xmm2[13],xmm4[14],xmm2[14],xmm4[15],xmm2[15] +; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} xmm4 = xmm4[10,u,u,u,u,u,13,12,u,u,u,u,u,15,14,u] +; AVX512DQ-BW-NEXT: vpshufhw {{.*#+}} xmm6 = xmm5[0,1,2,3,6,7,7,7] +; AVX512DQ-BW-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[2,1,3,2] ; AVX512DQ-BW-NEXT: movw $-32510, %cx # imm = 0x8102 ; AVX512DQ-BW-NEXT: kmovd %ecx, %k1 -; AVX512DQ-BW-NEXT: vmovdqu8 %xmm5, %xmm3 {%k1} -; AVX512DQ-BW-NEXT: vpermq {{.*#+}} ymm5 = ymm4[1,3,2,3] -; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} xmm5 = zero,zero,xmm5[6,14],zero,zero,zero,zero,zero,xmm5[7,15],zero,zero,zero,zero,zero -; AVX512DQ-BW-NEXT: vpermq {{.*#+}} ymm6 = ymm2[1,3,2,3] -; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} xmm6 = zero,zero,zero,zero,xmm6[6,14],zero,zero,zero,zero,zero,xmm6[7,15],zero,zero,zero -; AVX512DQ-BW-NEXT: vpor %xmm5, %xmm6, %xmm5 +; AVX512DQ-BW-NEXT: vmovdqu8 %xmm6, %xmm4 {%k1} +; AVX512DQ-BW-NEXT: vpermq {{.*#+}} ymm6 = ymm0[1,3,2,3] +; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} xmm6 = zero,zero,xmm6[6,14],zero,zero,zero,zero,zero,xmm6[7,15],zero,zero,zero,zero,zero +; AVX512DQ-BW-NEXT: vpermq {{.*#+}} ymm7 = ymm1[1,3,2,3] +; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} xmm7 = zero,zero,zero,zero,xmm7[6,14],zero,zero,zero,zero,zero,xmm7[7,15],zero,zero,zero +; AVX512DQ-BW-NEXT: vpor %xmm6, %xmm7, %xmm6 ; AVX512DQ-BW-NEXT: movw $-7741, %cx # imm = 0xE1C3 ; AVX512DQ-BW-NEXT: kmovd %ecx, %k1 -; AVX512DQ-BW-NEXT: vmovdqu8 %xmm3, %xmm5 {%k1} -; AVX512DQ-BW-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [4,5,4,5,5,6,5,6,4,5,4,5,5,6,5,6] -; AVX512DQ-BW-NEXT: # ymm3 = mem[0,1,0,1] -; AVX512DQ-BW-NEXT: vpermw %ymm1, %ymm3, %ymm3 -; AVX512DQ-BW-NEXT: vpermq {{.*#+}} ymm6 = ymm0[1,3,1,3] -; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} ymm6 = ymm6[u,u,u,1,9,u,u,u,u,u,2,10,u,u,u,u,u,19,27,u,u,u,u,u,20,28,u,u,u,u,u,21] +; AVX512DQ-BW-NEXT: vmovdqu8 %xmm4, %xmm6 {%k1} +; AVX512DQ-BW-NEXT: vbroadcasti128 {{.*#+}} ymm4 = [4,5,4,5,5,6,5,6,4,5,4,5,5,6,5,6] +; AVX512DQ-BW-NEXT: # ymm4 = mem[0,1,0,1] +; AVX512DQ-BW-NEXT: vpermw %ymm5, %ymm4, %ymm4 +; AVX512DQ-BW-NEXT: vpermq {{.*#+}} ymm2 = ymm2[1,3,1,3] +; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[u,u,u,1,9,u,u,u,u,u,2,10,u,u,u,u,u,19,27,u,u,u,u,u,20,28,u,u,u,u,u,21] ; AVX512DQ-BW-NEXT: movl $67637280, %ecx # imm = 0x4081020 ; AVX512DQ-BW-NEXT: kmovd %ecx, %k1 -; AVX512DQ-BW-NEXT: vmovdqu8 %ymm3, %ymm6 {%k1} -; AVX512DQ-BW-NEXT: vpermq {{.*#+}} ymm3 = ymm2[1,3,3,1] -; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} ymm3 = zero,ymm3[1,9],zero,zero,zero,zero,zero,ymm3[2,10],zero,zero,zero,zero,zero,ymm3[3,19],zero,zero,zero,zero,zero,ymm3[28,20],zero,zero,zero,zero,zero,ymm3[29,21],zero -; AVX512DQ-BW-NEXT: vpermq {{.*#+}} ymm7 = ymm4[3,1,1,3] -; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} ymm7 = ymm7[1],zero,zero,zero,zero,zero,ymm7[10,2],zero,zero,zero,zero,zero,ymm7[11,3],zero,zero,zero,zero,zero,ymm7[20,28],zero,zero,zero,zero,zero,ymm7[21,29],zero,zero,zero -; AVX512DQ-BW-NEXT: vpor %ymm3, %ymm7, %ymm3 +; AVX512DQ-BW-NEXT: vmovdqu8 %ymm4, %ymm2 {%k1} +; AVX512DQ-BW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,3,3,1] +; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} ymm1 = zero,ymm1[1,9],zero,zero,zero,zero,zero,ymm1[2,10],zero,zero,zero,zero,zero,ymm1[3,19],zero,zero,zero,zero,zero,ymm1[28,20],zero,zero,zero,zero,zero,ymm1[29,21],zero +; AVX512DQ-BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,1,1,3] +; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1],zero,zero,zero,zero,zero,ymm0[10,2],zero,zero,zero,zero,zero,ymm0[11,3],zero,zero,zero,zero,zero,ymm0[20,28],zero,zero,zero,zero,zero,ymm0[21,29],zero,zero,zero +; AVX512DQ-BW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512DQ-BW-NEXT: movl $-2029118408, %ecx # imm = 0x870E1C38 ; AVX512DQ-BW-NEXT: kmovd %ecx, %k1 -; AVX512DQ-BW-NEXT: vmovdqu8 %ymm6, %ymm3 {%k1} -; AVX512DQ-BW-NEXT: vinserti32x4 $2, %xmm5, %zmm3, %zmm3 -; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} ymm6 = zero,zero,zero,zero,zero,ymm2[5],zero,zero,zero,zero,zero,zero,ymm2[6],zero,zero,zero,zero,zero,zero,zero,ymm2[23],zero,zero,zero,zero,zero,zero,ymm2[24],zero,zero,zero,zero -; AVX512DQ-BW-NEXT: vpermq {{.*#+}} ymm7 = ymm2[2,3,0,1] -; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} ymm7 = zero,zero,zero,zero,zero,zero,ymm7[5],zero,zero,zero,zero,zero,zero,ymm7[6],zero,zero,zero,zero,zero,ymm7[23],zero,zero,zero,zero,zero,zero,ymm7[24],zero,zero,zero,zero,zero -; AVX512DQ-BW-NEXT: vpor %ymm6, %ymm7, %ymm6 -; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} ymm7 = zero,zero,zero,ymm4[5],zero,zero,zero,zero,zero,zero,ymm4[6],zero,zero,zero,zero,zero,zero,zero,ymm4[23],zero,zero,zero,zero,zero,zero,ymm4[24],zero,zero,zero,zero,zero,zero -; AVX512DQ-BW-NEXT: vpermq {{.*#+}} ymm8 = ymm4[2,3,0,1] -; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} ymm8 = zero,zero,zero,zero,ymm8[5],zero,zero,zero,zero,zero,zero,ymm8[6],zero,zero,zero,zero,zero,ymm8[23],zero,zero,zero,zero,zero,zero,ymm8[24],zero,zero,zero,zero,zero,zero,ymm8[25] -; AVX512DQ-BW-NEXT: vpor %ymm7, %ymm8, %ymm7 -; AVX512DQ-BW-NEXT: movl $202911840, %ecx # imm = 0xC183060 -; AVX512DQ-BW-NEXT: kmovd %ecx, %k1 -; AVX512DQ-BW-NEXT: vmovdqu8 %ymm6, %ymm7 {%k1} -; AVX512DQ-BW-NEXT: vpermq {{.*#+}} ymm4 = ymm4[0,2,0,2] -; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} ymm4 = ymm4[0,8],zero,zero,zero,zero,zero,ymm4[1,9],zero,zero,zero,zero,zero,ymm4[2,10],zero,zero,zero,zero,zero,ymm4[19,27],zero,zero,zero,zero,zero,ymm4[20,28],zero,zero -; AVX512DQ-BW-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,0,2] -; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} ymm2 = zero,zero,ymm2[0,8],zero,zero,zero,zero,zero,ymm2[1,9],zero,zero,zero,zero,zero,ymm2[18,26],zero,zero,zero,zero,zero,ymm2[19,27],zero,zero,zero,zero,zero,ymm2[20,28] -; AVX512DQ-BW-NEXT: vpor %ymm4, %ymm2, %ymm2 -; AVX512DQ-BW-NEXT: vinserti64x4 $1, %ymm7, %zmm2, %zmm2 -; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} ymm4 = ymm0[4],zero,zero,zero,zero,zero,zero,ymm0[5],zero,zero,zero,zero,zero,zero,ymm0[6],zero,zero,zero,zero,zero,zero,zero,ymm0[23],zero,zero,zero,zero,zero,zero,ymm0[24],zero,zero -; AVX512DQ-BW-NEXT: vpermq {{.*#+}} ymm6 = ymm0[2,3,0,1] -; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} ymm6 = zero,ymm6[4],zero,zero,zero,zero,zero,zero,ymm6[5],zero,zero,zero,zero,zero,zero,ymm6[6],zero,zero,zero,zero,zero,ymm6[23],zero,zero,zero,zero,zero,zero,ymm6[24],zero,zero,zero -; AVX512DQ-BW-NEXT: vpor %ymm4, %ymm6, %ymm4 -; AVX512DQ-BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,2] -; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,0,8,u,u,u,u,u,1,9,u,u,u,u,u,18,26,u,u,u,u,u,19,27,u,u,u,u,u] -; AVX512DQ-BW-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0 -; AVX512DQ-BW-NEXT: vpmovsxbw {{.*#+}} zmm4 = [1,1,0,0,1,1,0,0,4,5,1,1,1,1,0,0,2,2,2,4,2,2,2,4,3,3,3,3,2,2,2,4] -; AVX512DQ-BW-NEXT: vpermw %zmm1, %zmm4, %zmm1 -; AVX512DQ-BW-NEXT: movabsq $4647998506761461824, %rcx # imm = 0x4081020408102040 -; AVX512DQ-BW-NEXT: kmovq %rcx, %k1 -; AVX512DQ-BW-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1} -; AVX512DQ-BW-NEXT: movabsq $8133997386832558192, %rcx # imm = 0x70E1C3870E1C3870 -; AVX512DQ-BW-NEXT: kmovq %rcx, %k1 -; AVX512DQ-BW-NEXT: vmovdqu8 %zmm0, %zmm2 {%k1} -; AVX512DQ-BW-NEXT: vmovdqa %xmm5, 96(%rax) -; AVX512DQ-BW-NEXT: vmovdqa64 %zmm2, (%rax) -; AVX512DQ-BW-NEXT: vmovdqa %ymm3, 64(%rax) +; AVX512DQ-BW-NEXT: vmovdqu8 %ymm2, %ymm0 {%k1} +; AVX512DQ-BW-NEXT: vmovdqa %ymm0, 64(%rax) +; AVX512DQ-BW-NEXT: vmovdqa %xmm6, 96(%rax) +; AVX512DQ-BW-NEXT: vmovdqa64 %zmm3, (%rax) ; AVX512DQ-BW-NEXT: vzeroupper ; AVX512DQ-BW-NEXT: retq ; @@ -2566,43 +2559,42 @@ define void @store_i8_stride7_vf16(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec ; AVX512DQ-BW-FCP-NEXT: movw $-7741, %cx # imm = 0xE1C3 ; AVX512DQ-BW-FCP-NEXT: kmovd %ecx, %k1 ; AVX512DQ-BW-FCP-NEXT: vmovdqu8 %xmm5, %xmm4 {%k1} -; AVX512DQ-BW-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [4,5,4,5,5,6,5,6,4,5,4,5,5,6,5,6] -; AVX512DQ-BW-FCP-NEXT: # ymm5 = mem[0,1,0,1] -; AVX512DQ-BW-FCP-NEXT: vpermw %ymm6, %ymm5, %ymm5 -; AVX512DQ-BW-FCP-NEXT: vpermq {{.*#+}} ymm7 = ymm2[1,3,1,3] -; AVX512DQ-BW-FCP-NEXT: vpshufb {{.*#+}} ymm7 = ymm7[u,u,u,1,9,u,u,u,u,u,2,10,u,u,u,u,u,19,27,u,u,u,u,u,20,28,u,u,u,u,u,21] -; AVX512DQ-BW-FCP-NEXT: movl $67637280, %ecx # imm = 0x4081020 -; AVX512DQ-BW-FCP-NEXT: kmovd %ecx, %k1 -; AVX512DQ-BW-FCP-NEXT: vmovdqu8 %ymm5, %ymm7 {%k1} -; AVX512DQ-BW-FCP-NEXT: vpermq {{.*#+}} ymm5 = ymm1[1,3,3,1] -; AVX512DQ-BW-FCP-NEXT: vpshufb {{.*#+}} ymm5 = zero,ymm5[1,9],zero,zero,zero,zero,zero,ymm5[2,10],zero,zero,zero,zero,zero,ymm5[3,19],zero,zero,zero,zero,zero,ymm5[28,20],zero,zero,zero,zero,zero,ymm5[29,21],zero -; AVX512DQ-BW-FCP-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,1,1,3] -; AVX512DQ-BW-FCP-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1],zero,zero,zero,zero,zero,ymm0[10,2],zero,zero,zero,zero,zero,ymm0[11,3],zero,zero,zero,zero,zero,ymm0[20,28],zero,zero,zero,zero,zero,ymm0[21,29],zero,zero,zero -; AVX512DQ-BW-FCP-NEXT: vpor %ymm5, %ymm0, %ymm0 -; AVX512DQ-BW-FCP-NEXT: movl $-2029118408, %ecx # imm = 0x870E1C38 -; AVX512DQ-BW-FCP-NEXT: kmovd %ecx, %k1 -; AVX512DQ-BW-FCP-NEXT: vmovdqu8 %ymm7, %ymm0 {%k1} -; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $2, %xmm4, %zmm0, %zmm0 ; AVX512DQ-BW-FCP-NEXT: vpmovsxbw {{.*#+}} zmm5 = [1,1,0,0,1,1,0,0,4,5,1,1,1,1,0,0,2,2,2,4,2,2,2,4,3,3,3,3,2,2,2,4] ; AVX512DQ-BW-FCP-NEXT: vpermw %zmm6, %zmm5, %zmm5 -; AVX512DQ-BW-FCP-NEXT: vpmovsxbd {{.*#+}} zmm6 = [0,0,4,0,0,0,4,0,1,5,0,0,1,5,2,6] -; AVX512DQ-BW-FCP-NEXT: vpermd %zmm2, %zmm6, %zmm2 -; AVX512DQ-BW-FCP-NEXT: vpshufb {{.*#+}} zmm2 = zmm2[u,u,u,u,0,8,u,u,u,u,u,1,9,u,u,u,u,u,18,26,u,u,u,u,u,19,27,u,u,u,u,u,32,36,u,u,u,u,u,33,37,u,u,u,u,u,34,38,u,u,u,u,u,51,55,u,u,u,u,u,56,60,u,u] +; AVX512DQ-BW-FCP-NEXT: vpmovsxbd {{.*#+}} zmm7 = [0,0,4,0,0,0,4,0,1,5,0,0,1,5,2,6] +; AVX512DQ-BW-FCP-NEXT: vpermd %zmm2, %zmm7, %zmm7 +; AVX512DQ-BW-FCP-NEXT: vpshufb {{.*#+}} zmm7 = zmm7[u,u,u,u,0,8,u,u,u,u,u,1,9,u,u,u,u,u,18,26,u,u,u,u,u,19,27,u,u,u,u,u,32,36,u,u,u,u,u,33,37,u,u,u,u,u,34,38,u,u,u,u,u,51,55,u,u,u,u,u,56,60,u,u] ; AVX512DQ-BW-FCP-NEXT: movabsq $4647998506761461824, %rcx # imm = 0x4081020408102040 ; AVX512DQ-BW-FCP-NEXT: kmovq %rcx, %k1 -; AVX512DQ-BW-FCP-NEXT: vmovdqu8 %zmm5, %zmm2 {%k1} +; AVX512DQ-BW-FCP-NEXT: vmovdqu8 %zmm5, %zmm7 {%k1} ; AVX512DQ-BW-FCP-NEXT: vpmovsxbd {{.*#+}} zmm5 = [0,0,4,0,0,1,4,5,1,5,0,0,1,5,2,6] ; AVX512DQ-BW-FCP-NEXT: vpermd %zmm3, %zmm5, %zmm3 ; AVX512DQ-BW-FCP-NEXT: vpshufb {{.*#+}} zmm3 = zmm3[0,8],zero,zero,zero,zero,zero,zmm3[1,9],zero,zero,zero,zero,zero,zmm3[2,10],zero,zero,zero,zero,zero,zmm3[19,27],zero,zero,zero,zero,zero,zmm3[20,28],zero,zero,zero,zero,zero,zmm3[33,37],zero,zero,zero,zero,zero,zmm3[34,38],zero,zero,zero,zero,zero,zmm3[51,55],zero,zero,zero,zero,zero,zmm3[56,60],zero,zero,zero,zero,zero,zmm3[57] -; AVX512DQ-BW-FCP-NEXT: vpermd %zmm1, %zmm5, %zmm1 -; AVX512DQ-BW-FCP-NEXT: vpshufb {{.*#+}} zmm1 = zero,zero,zmm1[0,8],zero,zero,zero,zero,zero,zmm1[1,9],zero,zero,zero,zero,zero,zmm1[18,26],zero,zero,zero,zero,zero,zmm1[19,27],zero,zero,zero,zero,zero,zmm1[20,28],zero,zero,zero,zero,zero,zmm1[33,37],zero,zero,zero,zero,zero,zmm1[34,38],zero,zero,zero,zero,zero,zmm1[51,55],zero,zero,zero,zero,zero,zmm1[56,60],zero,zero,zero,zero -; AVX512DQ-BW-FCP-NEXT: vporq %zmm3, %zmm1, %zmm1 +; AVX512DQ-BW-FCP-NEXT: vpermd %zmm1, %zmm5, %zmm5 +; AVX512DQ-BW-FCP-NEXT: vpshufb {{.*#+}} zmm5 = zero,zero,zmm5[0,8],zero,zero,zero,zero,zero,zmm5[1,9],zero,zero,zero,zero,zero,zmm5[18,26],zero,zero,zero,zero,zero,zmm5[19,27],zero,zero,zero,zero,zero,zmm5[20,28],zero,zero,zero,zero,zero,zmm5[33,37],zero,zero,zero,zero,zero,zmm5[34,38],zero,zero,zero,zero,zero,zmm5[51,55],zero,zero,zero,zero,zero,zmm5[56,60],zero,zero,zero,zero +; AVX512DQ-BW-FCP-NEXT: vporq %zmm3, %zmm5, %zmm3 ; AVX512DQ-BW-FCP-NEXT: movabsq $8133997386832558192, %rcx # imm = 0x70E1C3870E1C3870 ; AVX512DQ-BW-FCP-NEXT: kmovq %rcx, %k1 -; AVX512DQ-BW-FCP-NEXT: vmovdqu8 %zmm2, %zmm1 {%k1} -; AVX512DQ-BW-FCP-NEXT: vmovdqa64 %zmm1, (%rax) -; AVX512DQ-BW-FCP-NEXT: vmovdqa %xmm4, 96(%rax) +; AVX512DQ-BW-FCP-NEXT: vmovdqu8 %zmm7, %zmm3 {%k1} +; AVX512DQ-BW-FCP-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [4,5,4,5,5,6,5,6,4,5,4,5,5,6,5,6] +; AVX512DQ-BW-FCP-NEXT: # ymm5 = mem[0,1,0,1] +; AVX512DQ-BW-FCP-NEXT: vpermw %ymm6, %ymm5, %ymm5 +; AVX512DQ-BW-FCP-NEXT: vpermq {{.*#+}} ymm2 = ymm2[1,3,1,3] +; AVX512DQ-BW-FCP-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[u,u,u,1,9,u,u,u,u,u,2,10,u,u,u,u,u,19,27,u,u,u,u,u,20,28,u,u,u,u,u,21] +; AVX512DQ-BW-FCP-NEXT: movl $67637280, %ecx # imm = 0x4081020 +; AVX512DQ-BW-FCP-NEXT: kmovd %ecx, %k1 +; AVX512DQ-BW-FCP-NEXT: vmovdqu8 %ymm5, %ymm2 {%k1} +; AVX512DQ-BW-FCP-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,3,3,1] +; AVX512DQ-BW-FCP-NEXT: vpshufb {{.*#+}} ymm1 = zero,ymm1[1,9],zero,zero,zero,zero,zero,ymm1[2,10],zero,zero,zero,zero,zero,ymm1[3,19],zero,zero,zero,zero,zero,ymm1[28,20],zero,zero,zero,zero,zero,ymm1[29,21],zero +; AVX512DQ-BW-FCP-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,1,1,3] +; AVX512DQ-BW-FCP-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1],zero,zero,zero,zero,zero,ymm0[10,2],zero,zero,zero,zero,zero,ymm0[11,3],zero,zero,zero,zero,zero,ymm0[20,28],zero,zero,zero,zero,zero,ymm0[21,29],zero,zero,zero +; AVX512DQ-BW-FCP-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512DQ-BW-FCP-NEXT: movl $-2029118408, %ecx # imm = 0x870E1C38 +; AVX512DQ-BW-FCP-NEXT: kmovd %ecx, %k1 +; AVX512DQ-BW-FCP-NEXT: vmovdqu8 %ymm2, %ymm0 {%k1} ; AVX512DQ-BW-FCP-NEXT: vmovdqa %ymm0, 64(%rax) +; AVX512DQ-BW-FCP-NEXT: vmovdqa64 %zmm3, (%rax) +; AVX512DQ-BW-FCP-NEXT: vmovdqa %xmm4, 96(%rax) ; AVX512DQ-BW-FCP-NEXT: vzeroupper ; AVX512DQ-BW-FCP-NEXT: retq %in.vec0 = load <16 x i8>, ptr %in.vecptr0, align 64 diff --git a/llvm/test/CodeGen/X86/win-catchpad-nested-cxx.ll b/llvm/test/CodeGen/X86/win-catchpad-nested-cxx.ll index b5d914153ffd3..bfb9c43b3fd16 100644 --- a/llvm/test/CodeGen/X86/win-catchpad-nested-cxx.ll +++ b/llvm/test/CodeGen/X86/win-catchpad-nested-cxx.ll @@ -48,14 +48,14 @@ handler2: ; X64-LABEL: $cppxdata$try_in_catch: ; CHECK-NEXT: .long 429065506 ; CHECK-NEXT: .long 4 -; CHECK-NEXT: .long ($stateUnwindMap$try_in_catch) +; CHECK-NEXT: .long $stateUnwindMap$try_in_catch ; CHECK-NEXT: .long 2 -; CHECK-NEXT: .long ($tryMap$try_in_catch) +; CHECK-NEXT: .long $tryMap$try_in_catch ; ip2state num + ptr ; X86-NEXT: .long 0 ; X86-NEXT: .long 0 ; X64-NEXT: .long 7 -; X64-NEXT: .long ($ip2state$try_in_catch) +; X64-NEXT: .long $ip2state$try_in_catch ; unwindhelp offset ; X64-NEXT: .long 40 ; CHECK-NEXT: .long 0 @@ -67,24 +67,24 @@ handler2: ; X86-NEXT: .long 2 ; X86-NEXT: .long 3 ; X86-NEXT: .long 1 -; X86-NEXT: .long ($handlerMap$0$try_in_catch) +; X86-NEXT: .long $handlerMap$0$try_in_catch ; X86-NEXT: .long 0 ; X86-NEXT: .long 0 ; X86-NEXT: .long 3 ; X86-NEXT: .long 1 -; X86-NEXT: .long ($handlerMap$1$try_in_catch) +; X86-NEXT: .long $handlerMap$1$try_in_catch ; X64-LABEL: $tryMap$try_in_catch: ; X64-NEXT: .long 0 ; X64-NEXT: .long 0 ; X64-NEXT: .long 3 ; X64-NEXT: .long 1 -; X64-NEXT: .long ($handlerMap$0$try_in_catch) +; X64-NEXT: .long $handlerMap$0$try_in_catch ; X64-NEXT: .long 2 ; X64-NEXT: .long 2 ; X64-NEXT: .long 3 ; X64-NEXT: .long 1 -; X64-NEXT: .long ($handlerMap$1$try_in_catch) +; X64-NEXT: .long $handlerMap$1$try_in_catch ; CHECK: $handlerMap$0$try_in_catch: ; CHECK-NEXT: .long 64 diff --git a/llvm/test/CodeGen/X86/win-catchpad.ll b/llvm/test/CodeGen/X86/win-catchpad.ll index ceca37710e9ec..249194610e9f8 100644 --- a/llvm/test/CodeGen/X86/win-catchpad.ll +++ b/llvm/test/CodeGen/X86/win-catchpad.ll @@ -183,11 +183,11 @@ try.cont: ; X64: $cppxdata$try_catch_catch: ; X64-NEXT: .long 429065506 ; X64-NEXT: .long 2 -; X64-NEXT: .long ($stateUnwindMap$try_catch_catch)@IMGREL +; X64-NEXT: .long $stateUnwindMap$try_catch_catch@IMGREL ; X64-NEXT: .long 1 -; X64-NEXT: .long ($tryMap$try_catch_catch)@IMGREL +; X64-NEXT: .long $tryMap$try_catch_catch@IMGREL ; X64-NEXT: .long 5 -; X64-NEXT: .long ($ip2state$try_catch_catch)@IMGREL +; X64-NEXT: .long $ip2state$try_catch_catch@IMGREL ; X64-NEXT: .long 48 ; X64-NEXT: .long 0 ; X64-NEXT: .long 1 @@ -197,7 +197,7 @@ try.cont: ; X64-NEXT: .long 0 ; X64-NEXT: .long 1 ; X64-NEXT: .long 2 -; X64-NEXT: .long ($handlerMap$0$try_catch_catch)@IMGREL +; X64-NEXT: .long $handlerMap$0$try_catch_catch@IMGREL ; X64: $handlerMap$0$try_catch_catch: ; X64-NEXT: .long 0 @@ -325,11 +325,11 @@ try.cont: ; X64-LABEL: $cppxdata$branch_to_normal_dest: ; X64-NEXT: .long 429065506 ; X64-NEXT: .long 2 -; X64-NEXT: .long ($stateUnwindMap$branch_to_normal_dest)@IMGREL +; X64-NEXT: .long $stateUnwindMap$branch_to_normal_dest@IMGREL ; X64-NEXT: .long 1 -; X64-NEXT: .long ($tryMap$branch_to_normal_dest)@IMGREL +; X64-NEXT: .long $tryMap$branch_to_normal_dest@IMGREL ; X64-NEXT: .long 4 -; X64-NEXT: .long ($ip2state$branch_to_normal_dest)@IMGREL +; X64-NEXT: .long $ip2state$branch_to_normal_dest@IMGREL ; X64-NEXT: .long 40 ; X64-NEXT: .long 0 ; X64-NEXT: .long 1 @@ -345,7 +345,7 @@ try.cont: ; X64-NEXT: .long 0 ; X64-NEXT: .long 1 ; X64-NEXT: .long 1 -; X64-NEXT: .long ($handlerMap$0$branch_to_normal_dest)@IMGREL +; X64-NEXT: .long $handlerMap$0$branch_to_normal_dest@IMGREL ; X64-LABEL: $handlerMap$0$branch_to_normal_dest: ; X64-NEXT: .long 64 diff --git a/llvm/test/CodeGen/X86/win-cleanuppad.ll b/llvm/test/CodeGen/X86/win-cleanuppad.ll index 452f0a8e36d8d..e3f7f5be0049e 100644 --- a/llvm/test/CodeGen/X86/win-cleanuppad.ll +++ b/llvm/test/CodeGen/X86/win-cleanuppad.ll @@ -39,11 +39,11 @@ ehcleanup: ; preds = %entry ; CHECK: $cppxdata$simple_cleanup: ; CHECK-NEXT: .long 429065506 ; CHECK-NEXT: .long 1 -; CHECK-NEXT: .long ($stateUnwindMap$simple_cleanup)@IMGREL +; CHECK-NEXT: .long $stateUnwindMap$simple_cleanup@IMGREL ; CHECK-NEXT: .long 0 ; CHECK-NEXT: .long 0 ; CHECK-NEXT: .long 3 -; CHECK-NEXT: .long ($ip2state$simple_cleanup)@IMGREL +; CHECK-NEXT: .long $ip2state$simple_cleanup@IMGREL ; UnwindHelp offset should match the -2 store above ; CHECK-NEXT: .long 40 ; CHECK-NEXT: .long 0 @@ -114,7 +114,7 @@ cleanup.outer: ; preds = %invoke.cont.1, %c ; X86: L__ehtable$nested_cleanup: ; X86: .long 429065506 ; X86: .long 2 -; X86: .long ($stateUnwindMap$nested_cleanup) +; X86: .long $stateUnwindMap$nested_cleanup ; X86: .long 0 ; X86: .long 0 ; X86: .long 0 @@ -167,11 +167,11 @@ cleanup.outer: ; preds = %invoke.cont.1, %c ; X64: $cppxdata$nested_cleanup: ; X64-NEXT: .long 429065506 ; X64-NEXT: .long 2 -; X64-NEXT: .long ($stateUnwindMap$nested_cleanup)@IMGREL +; X64-NEXT: .long $stateUnwindMap$nested_cleanup@IMGREL ; X64-NEXT: .long 0 ; X64-NEXT: .long 0 ; X64-NEXT: .long 5 -; X64-NEXT: .long ($ip2state$nested_cleanup)@IMGREL +; X64-NEXT: .long $ip2state$nested_cleanup@IMGREL ; X64-NEXT: .long 56 ; X64-NEXT: .long 0 ; X64-NEXT: .long 1 diff --git a/llvm/test/CodeGen/X86/win-funclet-cfi.ll b/llvm/test/CodeGen/X86/win-funclet-cfi.ll index f9a1e2f0d2880..96b55772e05e4 100644 --- a/llvm/test/CodeGen/X86/win-funclet-cfi.ll +++ b/llvm/test/CodeGen/X86/win-funclet-cfi.ll @@ -61,7 +61,7 @@ declare i32 @__CxxFrameHandler3(...) ; Don't emit a reference to the LSDA. ; CHECK: .seh_handlerdata -; CHECK-NOT: .long ("$cppxdata$?f@@YAXXZ")@IMGREL +; CHECK-NOT: .long "$cppxdata$?f@@YAXXZ"@IMGREL ; CHECK-NEXT: .text ; CHECK: .seh_endproc @@ -92,6 +92,6 @@ declare i32 @__CxxFrameHandler3(...) ; Emit a reference to the LSDA. ; CHECK: .seh_handlerdata -; CHECK-NEXT: .long ("$cppxdata$?f@@YAXXZ")@IMGREL +; CHECK-NEXT: .long "$cppxdata$?f@@YAXXZ"@IMGREL ; CHECK-NEXT: .text ; CHECK: .seh_endproc diff --git a/llvm/test/CodeGen/X86/win32-eh.ll b/llvm/test/CodeGen/X86/win32-eh.ll index d3d19ede546d6..857df9882be47 100644 --- a/llvm/test/CodeGen/X86/win32-eh.ll +++ b/llvm/test/CodeGen/X86/win32-eh.ll @@ -201,9 +201,9 @@ catch: ; CHECK-LABEL: L__ehtable$use_CxxFrameHandler3: ; CHECK-NEXT: .long 429065506 ; CHECK-NEXT: .long 2 -; CHECK-NEXT: .long ($stateUnwindMap$use_CxxFrameHandler3) +; CHECK-NEXT: .long $stateUnwindMap$use_CxxFrameHandler3 ; CHECK-NEXT: .long 1 -; CHECK-NEXT: .long ($tryMap$use_CxxFrameHandler3) +; CHECK-NEXT: .long $tryMap$use_CxxFrameHandler3 ; CHECK-NEXT: .long 0 ; CHECK-NEXT: .long 0 ; CHECK-NEXT: .long 0 diff --git a/llvm/test/CodeGen/X86/windows-seh-EHa-CppCatchDotDotDot.ll b/llvm/test/CodeGen/X86/windows-seh-EHa-CppCatchDotDotDot.ll index 944ffab24a5d1..785c2606186a6 100644 --- a/llvm/test/CodeGen/X86/windows-seh-EHa-CppCatchDotDotDot.ll +++ b/llvm/test/CodeGen/X86/windows-seh-EHa-CppCatchDotDotDot.ll @@ -1,10 +1,10 @@ ; RUN: llc -verify-machineinstrs < %s | FileCheck %s ; CHECK-LABEL: "$cppxdata$?crash@@YAXH@Z": -; CHECK: .long ("$stateUnwindMap$?crash@@YAXH@Z") -; CHECK: .long ("$tryMap$?crash@@YAXH@Z")@IMGREL # TryBlockMap +; CHECK: .long "$stateUnwindMap$?crash@@YAXH@Z" +; CHECK: .long "$tryMap$?crash@@YAXH@Z"@IMGREL # TryBlockMap ; CHECK-NEXT: .long 6 # IPMapEntries -; CHECK-NEXT: .long ("$ip2state$?crash@@YAXH@Z") +; CHECK-NEXT: .long "$ip2state$?crash@@YAXH@Z" ; CHECK-LABEL: "$stateUnwindMap$?crash@@YAXH@Z": ; CHECK-NEXT: .long -1 @@ -19,7 +19,7 @@ ; CHECK-NEXT: .long 1 ; CHECK-NEXT: .long 2 ; CHECK-NEXT: .long 1 -; CHECK-NEXT: .long ("$handlerMap$ +; CHECK-NEXT: .long "$handlerMap$ ; CHECK: "$handlerMap$0$?crash@@YAXH@Z" ; CHECK-NEXT: .long 0 diff --git a/llvm/test/CodeGen/X86/windows-seh-EHa-CppDtors01.ll b/llvm/test/CodeGen/X86/windows-seh-EHa-CppDtors01.ll index 54c1d838a30fd..6c6e9c3b66804 100644 --- a/llvm/test/CodeGen/X86/windows-seh-EHa-CppDtors01.ll +++ b/llvm/test/CodeGen/X86/windows-seh-EHa-CppDtors01.ll @@ -1,8 +1,8 @@ ; RUN: llc -verify-machineinstrs < %s | FileCheck %s ; CHECK-LABEL: "$cppxdata$?crash@@YAXH@Z": -; CHECK: .long ("$stateUnwindMap$?crash@@YAXH@Z") -; CHECK: .long ("$ip2state$?crash@@YAXH@Z") +; CHECK: .long "$stateUnwindMap$?crash@@YAXH@Z" +; CHECK: .long "$ip2state$?crash@@YAXH@Z" ; CHECK-LABEL: "$stateUnwindMap$?crash@@YAXH@Z": ; CHECK: .long -1 diff --git a/llvm/test/CodeGen/X86/wineh-coreclr.ll b/llvm/test/CodeGen/X86/wineh-coreclr.ll index d30f14e272fcb..baf5eaa29d281 100644 --- a/llvm/test/CodeGen/X86/wineh-coreclr.ll +++ b/llvm/test/CodeGen/X86/wineh-coreclr.ll @@ -166,9 +166,9 @@ tail: ; Clause 1: call f(2) is guarded by catch1 ; CHECK-NEXT: .long 0 ; ^ flags (0 => catch handler) -; CHECK-NEXT: .long ([[test1_before_f2]]-[[test1_begin]])+1 +; CHECK-NEXT: .long [[test1_before_f2]]-[[test1_begin]]+1 ; ^ offset of start of clause -; CHECK-NEXT: .long ([[test1_after_f2]]-[[test1_begin]])+1 +; CHECK-NEXT: .long [[test1_after_f2]]-[[test1_begin]]+1 ; ^ offset of end of clause ; CHECK-NEXT: .long [[test1_catch1]]-[[test1_begin]] ; ^ offset of start of handler @@ -179,9 +179,9 @@ tail: ; Clause 2: call f(2) is also guarded by catch2 ; CHECK-NEXT: .long 0 ; ^ flags (0 => catch handler) -; CHECK-NEXT: .long ([[test1_before_f2]]-[[test1_begin]])+1 +; CHECK-NEXT: .long [[test1_before_f2]]-[[test1_begin]]+1 ; ^ offset of start of clause -; CHECK-NEXT: .long ([[test1_after_f2]]-[[test1_begin]])+1 +; CHECK-NEXT: .long [[test1_after_f2]]-[[test1_begin]]+1 ; ^ offset of end of clause ; CHECK-NEXT: .long [[test1_catch2]]-[[test1_begin]] ; ^ offset of start of handler @@ -192,9 +192,9 @@ tail: ; Clause 3: calls f(1) and f(2) are guarded by finally ; CHECK-NEXT: .long 2 ; ^ flags (2 => finally handler) -; CHECK-NEXT: .long ([[test1_before_f1]]-[[test1_begin]])+1 +; CHECK-NEXT: .long [[test1_before_f1]]-[[test1_begin]]+1 ; ^ offset of start of clause -; CHECK-NEXT: .long ([[test1_after_f2]]-[[test1_begin]])+1 +; CHECK-NEXT: .long [[test1_after_f2]]-[[test1_begin]]+1 ; ^ offset of end of clause ; CHECK-NEXT: .long [[test1_finally]]-[[test1_begin]] ; ^ offset of start of handler @@ -208,9 +208,9 @@ tail: ; is the main function, not that funclet. ; CHECK-NEXT: .long 10 ; ^ flags (2 => finally handler | 8 => duplicate) -; CHECK-NEXT: .long ([[test1_before_f3]]-[[test1_begin]])+1 +; CHECK-NEXT: .long [[test1_before_f3]]-[[test1_begin]]+1 ; ^ offset of start of clause -; CHECK-NEXT: .long ([[test1_after_f3]]-[[test1_begin]])+1 +; CHECK-NEXT: .long [[test1_after_f3]]-[[test1_begin]]+1 ; ^ offset of end of clause ; CHECK-NEXT: .long [[test1_finally]]-[[test1_begin]] ; ^ offset of start of handler @@ -221,9 +221,9 @@ tail: ; Clause 5: call f(5) is guarded by fault ; CHECK-NEXT: .long 4 ; ^ flags (4 => fault handler) -; CHECK-NEXT: .long ([[test1_before_f5]]-[[test1_begin]])+1 +; CHECK-NEXT: .long [[test1_before_f5]]-[[test1_begin]]+1 ; ^ offset of start of clause -; CHECK-NEXT: .long ([[test1_after_f5]]-[[test1_begin]])+1 +; CHECK-NEXT: .long [[test1_after_f5]]-[[test1_begin]]+1 ; ^ offset of end of clause ; CHECK-NEXT: .long [[test1_fault]]-[[test1_begin]] ; ^ offset of start of handler @@ -237,9 +237,9 @@ tail: ; is the main function, not that funclet. ; CHECK-NEXT: .long 10 ; ^ flags (2 => finally handler | 8 => duplicate) -; CHECK-NEXT: .long ([[test1_before_f4]]-[[test1_begin]])+1 +; CHECK-NEXT: .long [[test1_before_f4]]-[[test1_begin]]+1 ; ^ offset of start of clause -; CHECK-NEXT: .long ([[test1_after_f5]]-[[test1_begin]])+1 +; CHECK-NEXT: .long [[test1_after_f5]]-[[test1_begin]]+1 ; ^ offset of end of clause ; CHECK-NEXT: .long [[test1_finally]]-[[test1_begin]] ; ^ offset of start of handler @@ -253,9 +253,9 @@ tail: ; is the main function, not that funclet. ; CHECK-NEXT: .long 10 ; ^ flags (2 => finally handler | 8 => duplicate) -; CHECK-NEXT: .long ([[test1_before_f6]]-[[test1_begin]])+1 +; CHECK-NEXT: .long [[test1_before_f6]]-[[test1_begin]]+1 ; ^ offset of start of clause -; CHECK-NEXT: .long ([[test1_after_f6]]-[[test1_begin]])+1 +; CHECK-NEXT: .long [[test1_after_f6]]-[[test1_begin]]+1 ; ^ offset of end of clause ; CHECK-NEXT: .long [[test1_finally]]-[[test1_begin]] ; ^ offset of start of handler @@ -343,9 +343,9 @@ unreachable: ; Clause 1: call f(1) is guarded by fault ; CHECK-NEXT: .long 4 ; ^ flags (4 => fault handler) -; CHECK-NEXT: .long ([[test2_before_f1]]-[[test2_begin]])+1 +; CHECK-NEXT: .long [[test2_before_f1]]-[[test2_begin]]+1 ; ^ offset of start of clause -; CHECK-NEXT: .long ([[test2_after_f1]]-[[test2_begin]])+1 +; CHECK-NEXT: .long [[test2_after_f1]]-[[test2_begin]]+1 ; ^ offset of end of clause ; CHECK-NEXT: .long [[test2_fault]]-[[test2_begin]] ; ^ offset of start of handler @@ -356,9 +356,9 @@ unreachable: ; Clause 2: call f(1) is also guarded by catch2 ; CHECK-NEXT: .long 0 ; ^ flags (0 => catch handler) -; CHECK-NEXT: .long ([[test2_before_f1]]-[[test2_begin]])+1 +; CHECK-NEXT: .long [[test2_before_f1]]-[[test2_begin]]+1 ; ^ offset of start of clause -; CHECK-NEXT: .long ([[test2_after_f1]]-[[test2_begin]])+1 +; CHECK-NEXT: .long [[test2_after_f1]]-[[test2_begin]]+1 ; ^ offset of end of clause ; CHECK-NEXT: .long [[test2_catch2]]-[[test2_begin]] ; ^ offset of start of handler @@ -369,9 +369,9 @@ unreachable: ; Clause 3: calls f(2) is guarded by catch1 ; CHECK-NEXT: .long 0 ; ^ flags (0 => catch handler) -; CHECK-NEXT: .long ([[test2_before_f2]]-[[test2_begin]])+1 +; CHECK-NEXT: .long [[test2_before_f2]]-[[test2_begin]]+1 ; ^ offset of start of clause -; CHECK-NEXT: .long ([[test2_after_f2]]-[[test2_begin]])+1 +; CHECK-NEXT: .long [[test2_after_f2]]-[[test2_begin]]+1 ; ^ offset of end of clause ; CHECK-NEXT: .long [[test2_catch1]]-[[test2_begin]] ; ^ offset of start of handler @@ -385,9 +385,9 @@ unreachable: ; is the main function, not that funclet. ; CHECK-NEXT: .long 8 ; ^ flags (0 => catch handler | 8 => duplicate) -; CHECK-NEXT: .long ([[test2_before_f2]]-[[test2_begin]])+1 +; CHECK-NEXT: .long [[test2_before_f2]]-[[test2_begin]]+1 ; ^ offset of start of clause -; CHECK-NEXT: .long ([[test2_after_f2]]-[[test2_begin]])+1 +; CHECK-NEXT: .long [[test2_after_f2]]-[[test2_begin]]+1 ; ^ offset of end of clause ; CHECK-NEXT: .long [[test2_catch2]]-[[test2_begin]] ; ^ offset of start of handler @@ -559,9 +559,9 @@ unreachable: ; Clause 1: call f(1) is guarded by fault1 ; CHECK-NEXT: .long 4 ; ^ flags (4 => fault handler) -; CHECK-NEXT: .long ([[test3_before_f1]]-[[test3_begin]])+1 +; CHECK-NEXT: .long [[test3_before_f1]]-[[test3_begin]]+1 ; ^ offset of start of clause -; CHECK-NEXT: .long ([[test3_after_f1]]-[[test3_begin]])+1 +; CHECK-NEXT: .long [[test3_after_f1]]-[[test3_begin]]+1 ; ^ offset of end of clause ; CHECK-NEXT: .long [[test3_fault1]]-[[test3_begin]] ; ^ offset of start of handler @@ -572,9 +572,9 @@ unreachable: ; Clause 3: call f(6) is guarded by catch1 ; CHECK-NEXT: .long 0 ; ^ flags (0 => catch handler) -; CHECK-NEXT: .long ([[test3_before_f6]]-[[test3_begin]])+1 +; CHECK-NEXT: .long [[test3_before_f6]]-[[test3_begin]]+1 ; ^ offset of start of clause -; CHECK-NEXT: .long ([[test3_after_f6]]-[[test3_begin]])+1 +; CHECK-NEXT: .long [[test3_after_f6]]-[[test3_begin]]+1 ; ^ offset of end of clause ; CHECK-NEXT: .long [[test3_catch1]]-[[test3_begin]] ; ^ offset of start of handler @@ -585,9 +585,9 @@ unreachable: ; Clause 3: call f(6) is also guarded by catch2 ; CHECK-NEXT: .long 0 ; ^ flags (0 => catch handler) -; CHECK-NEXT: .long ([[test3_before_f6]]-[[test3_begin]])+1 +; CHECK-NEXT: .long [[test3_before_f6]]-[[test3_begin]]+1 ; ^ offset of start of clause -; CHECK-NEXT: .long ([[test3_after_f6]]-[[test3_begin]])+1 +; CHECK-NEXT: .long [[test3_after_f6]]-[[test3_begin]]+1 ; ^ offset of end of clause ; CHECK-NEXT: .long [[test3_catch2]]-[[test3_begin]] ; ^ offset of start of handler @@ -601,9 +601,9 @@ unreachable: ; is fault1, not that funclet. ; CHECK-NEXT: .long 12 ; ^ flags (4 => fault handler | 8 => duplicate) -; CHECK-NEXT: .long ([[test3_before_f7]]-[[test3_begin]])+1 +; CHECK-NEXT: .long [[test3_before_f7]]-[[test3_begin]]+1 ; ^ offset of start of clause -; CHECK-NEXT: .long ([[test3_after_f7]]-[[test3_begin]])+1 +; CHECK-NEXT: .long [[test3_after_f7]]-[[test3_begin]]+1 ; ^ offset of end of clause ; CHECK-NEXT: .long [[test3_fault5]]-[[test3_begin]] ; ^ offset of start of handler @@ -614,9 +614,9 @@ unreachable: ; Clause 5: call f(4) is guarded by fault4 ; CHECK-NEXT: .long 4 ; ^ flags (4 => fault handler) -; CHECK-NEXT: .long ([[test3_before_f4]]-[[test3_begin]])+1 +; CHECK-NEXT: .long [[test3_before_f4]]-[[test3_begin]]+1 ; ^ offset of start of clause -; CHECK-NEXT: .long ([[test3_after_f4]]-[[test3_begin]])+1 +; CHECK-NEXT: .long [[test3_after_f4]]-[[test3_begin]]+1 ; ^ offset of end of clause ; CHECK-NEXT: .long [[test3_fault4]]-[[test3_begin]] ; ^ offset of start of handler @@ -630,9 +630,9 @@ unreachable: ; is fault1, not that funclet. ; CHECK-NEXT: .long 12 ; ^ flags (4 => fault handler) -; CHECK-NEXT: .long ([[test3_before_f4]]-[[test3_begin]])+1 +; CHECK-NEXT: .long [[test3_before_f4]]-[[test3_begin]]+1 ; ^ offset of start of clause -; CHECK-NEXT: .long ([[test3_after_f4]]-[[test3_begin]])+1 +; CHECK-NEXT: .long [[test3_after_f4]]-[[test3_begin]]+1 ; ^ offset of end of clause ; CHECK-NEXT: .long [[test3_fault5]]-[[test3_begin]] ; ^ offset of start of handler @@ -643,9 +643,9 @@ unreachable: ; Clause 7: call f(3) is guarded by fault3 ; CHECK-NEXT: .long 4 ; ^ flags (4 => fault handler) -; CHECK-NEXT: .long ([[test3_before_f3]]-[[test3_begin]])+1 +; CHECK-NEXT: .long [[test3_before_f3]]-[[test3_begin]]+1 ; ^ offset of start of clause -; CHECK-NEXT: .long ([[test3_after_f3]]-[[test3_begin]])+1 +; CHECK-NEXT: .long [[test3_after_f3]]-[[test3_begin]]+1 ; ^ offset of end of clause ; CHECK-NEXT: .long [[test3_fault3]]-[[test3_begin]] ; ^ offset of start of handler @@ -659,9 +659,9 @@ unreachable: ; is fault1, not that funclet. ; CHECK-NEXT: .long 12 ; ^ flags (4 => fault handler | 8 => duplicate) -; CHECK-NEXT: .long ([[test3_before_f3]]-[[test3_begin]])+1 +; CHECK-NEXT: .long [[test3_before_f3]]-[[test3_begin]]+1 ; ^ offset of start of clause -; CHECK-NEXT: .long ([[test3_after_f3]]-[[test3_begin]])+1 +; CHECK-NEXT: .long [[test3_after_f3]]-[[test3_begin]]+1 ; ^ offset of end of clause ; CHECK-NEXT: .long [[test3_fault5]]-[[test3_begin]] ; ^ offset of start of handler @@ -672,9 +672,9 @@ unreachable: ; Clause 9: call f(2) is guarded by fault2 ; CHECK-NEXT: .long 4 ; ^ flags (4 => fault handler) -; CHECK-NEXT: .long ([[test3_before_f2]]-[[test3_begin]])+1 +; CHECK-NEXT: .long [[test3_before_f2]]-[[test3_begin]]+1 ; ^ offset of start of clause -; CHECK-NEXT: .long ([[test3_after_f2]]-[[test3_begin]])+1 +; CHECK-NEXT: .long [[test3_after_f2]]-[[test3_begin]]+1 ; ^ offset of end of clause ; CHECK-NEXT: .long [[test3_fault2]]-[[test3_begin]] ; ^ offset of start of handler @@ -685,9 +685,9 @@ unreachable: ; Clause 10: call f(2) is guarded by fault5 ; CHECK-NEXT: .long 4 ; ^ flags (4 => fault handler) -; CHECK-NEXT: .long ([[test3_before_f2]]-[[test3_begin]])+1 +; CHECK-NEXT: .long [[test3_before_f2]]-[[test3_begin]]+1 ; ^ offset of start of clause -; CHECK-NEXT: .long ([[test3_after_f2]]-[[test3_begin]])+1 +; CHECK-NEXT: .long [[test3_after_f2]]-[[test3_begin]]+1 ; ^ offset of end of clause ; CHECK-NEXT: .long [[test3_fault5]]-[[test3_begin]] ; ^ offset of start of handler diff --git a/llvm/test/CodeGen/X86/x86-64-plt-relative-reloc.ll b/llvm/test/CodeGen/X86/x86-64-plt-relative-reloc.ll index f949c83efd03f..54736c94af248 100644 --- a/llvm/test/CodeGen/X86/x86-64-plt-relative-reloc.ll +++ b/llvm/test/CodeGen/X86/x86-64-plt-relative-reloc.ll @@ -13,7 +13,7 @@ declare void @fn3() @global4 = external unnamed_addr global i8 ; CHECK: .long 0 -; CHECK-NEXT: .long (fn1@PLT-vtable)-4 -; CHECK-NEXT: .long (fn2@PLT-vtable)-4 -; CHECK-NEXT: .long (fn3-vtable)-4 -; CHECK-NEXT: .long (global4-vtable)-4 +; CHECK-NEXT: .long fn1@PLT-vtable-4 +; CHECK-NEXT: .long fn2@PLT-vtable-4 +; CHECK-NEXT: .long fn3-vtable-4 +; CHECK-NEXT: .long global4-vtable-4 diff --git a/llvm/test/CodeGen/X86/x86-plt-relative-reloc.ll b/llvm/test/CodeGen/X86/x86-plt-relative-reloc.ll index 8c86cd29d1c81..d5e80285b160d 100644 --- a/llvm/test/CodeGen/X86/x86-plt-relative-reloc.ll +++ b/llvm/test/CodeGen/X86/x86-plt-relative-reloc.ll @@ -11,6 +11,6 @@ declare void @fn2() unnamed_addr declare void @fn3() ; CHECK: .long 0 -; CHECK-NEXT: .long (fn1@PLT-vtable)-4 -; CHECK-NEXT: .long (fn2@PLT-vtable)-4 -; CHECK-NEXT: .long (fn3-vtable)-4 +; CHECK-NEXT: .long fn1@PLT-vtable-4 +; CHECK-NEXT: .long fn2@PLT-vtable-4 +; CHECK-NEXT: .long fn3-vtable-4 diff --git a/llvm/test/DebugInfo/COFF/jump-table.ll b/llvm/test/DebugInfo/COFF/jump-table.ll index a8039809c8b77..3eda2438ea88a 100644 --- a/llvm/test/DebugInfo/COFF/jump-table.ll +++ b/llvm/test/DebugInfo/COFF/jump-table.ll @@ -58,7 +58,7 @@ ; CHECK: {{\.?}}LJTI0_0: ; I686-NEXT: .long LBB0_[[#]] ; X64-NEXT: .long .LBB0_[[#]]-.LJTI0_0 -; A32-NEXT: .byte (($MBB0_[[#]])-(.LCPI0_0+4))/2 +; A32-NEXT: .byte ($MBB0_[[#]]-(.LCPI0_0+4))/2 ; A64-NEXT: .byte (.LBB0_[[FIRSTBLOCK:[0-9]+]]-.LBB0_[[FIRSTBLOCK]])>>2 ; NOTE: thumbv7a places the jump tables just after the branch, so check for the other branch now ; A32: .LCPI0_1: @@ -66,7 +66,7 @@ ; CHECK: {{\.?}}LJTI0_1: ; I686-NEXT: .long LBB0_[[#]] ; X64-NEXT: .long .LBB0_[[#]]-.LJTI0_1 -; A32-NEXT: .byte (($MBB0_[[#]])-(.LCPI0_1+4))/2 +; A32-NEXT: .byte ($MBB0_[[#]]-(.LCPI0_1+4))/2 ; A64-NEXT: .byte (.LBB0_[[SECONDBLOCK:[0-9]+]]-.LBB0_[[SECONDBLOCK]])>>2 ; Verify CodeView diff --git a/llvm/test/MC/AArch64/elf-reloc-ptrauth.s b/llvm/test/MC/AArch64/elf-reloc-ptrauth.s index 057b298a0a0df..bed85bcc5798b 100644 --- a/llvm/test/MC/AArch64/elf-reloc-ptrauth.s +++ b/llvm/test/MC/AArch64/elf-reloc-ptrauth.s @@ -91,25 +91,25 @@ _g9: .quad ("_g 7" + 7)@AUTH(ia,16) .quad 0 -// ASM: .xword (_g9@AUTH(ia,42))-(_g8@AUTH(ia,42)) +// ASM: .xword _g9@AUTH(ia,42)-_g8@AUTH(ia,42) .quad _g9@AUTH(ia,42) - _g8@AUTH(ia,42) .quad 0 .ifdef ASMONLY -// ASM: .xword (_g10@AUTH(ia,42))+1 +// ASM: .xword _g10@AUTH(ia,42)+1 .quad _g10@AUTH(ia,42) + 1 -// ASM: .xword 1+(_g11@AUTH(ia,42)) +// ASM: .xword 1+_g11@AUTH(ia,42) .quad 1 + _g11@AUTH(ia,42) -// ASM: .xword (1+(_g12@AUTH(ia,42)))+1 +// ASM: .xword 1+_g12@AUTH(ia,42)+1 .quad 1 + _g12@AUTH(ia,42) + 1 -// ASM: .xword (_g13@AUTH(ia,42))+(_g14@AUTH(ia,42)) +// ASM: .xword _g13@AUTH(ia,42)+_g14@AUTH(ia,42) .quad _g13@AUTH(ia,42) + _g14@AUTH(ia,42) -// ASM: .xword (_g9@AUTH(ia,42))-_g8 +// ASM: .xword _g9@AUTH(ia,42)-_g8 .quad _g9@AUTH(ia,42) - _g8 .quad 0 diff --git a/llvm/test/MC/AMDGPU/expressions.s b/llvm/test/MC/AMDGPU/expressions.s index f917347a3bd79..d0ef0d5f93736 100644 --- a/llvm/test/MC/AMDGPU/expressions.s +++ b/llvm/test/MC/AMDGPU/expressions.s @@ -269,8 +269,8 @@ BB1: v_nop_e64 BB2: s_sub_u32 vcc_lo, vcc_lo, (BB2+4)-BB1 -// VI: s_sub_u32 vcc_lo, vcc_lo, (BB2+4)-BB1 ; encoding: [0x6a,0xff,0xea,0x80,A,A,A,A] -// VI-NEXT: ; fixup A - offset: 4, value: (BB2+4)-BB1, kind: FK_Data_4 +// VI: s_sub_u32 vcc_lo, vcc_lo, BB2+4-BB1 ; encoding: [0x6a,0xff,0xea,0x80,A,A,A,A] +// VI-NEXT: ; fixup A - offset: 4, value: BB2+4-BB1, kind: FK_Data_4 s_add_u32 vcc_lo, vcc_lo, (BB2-BB1)&4294967295 // VI: s_add_u32 vcc_lo, vcc_lo, (BB2-BB1)&4294967295 ; encoding: [0x6a,0xff,0x6a,0x80,A,A,A,A] // VI-NEXT: ; fixup A - offset: 4, value: (BB2-BB1)&4294967295, kind: FK_Data_4 diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx10.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx10.s index 7f5240d649b7f..ac06e6177d321 100644 --- a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx10.s +++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx10.s @@ -115,35 +115,35 @@ expr_defined: // ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0 // ASM-NEXT: .amdhsa_wavefront_size32 1 -// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset ((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&1 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&128)>>7 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&256)>>8 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&512)>>9 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&1024)>>10 -// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&6144)>>11 +// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset ((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&128)>>7 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&256)>>8 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&512)>>9 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&1024)>>10 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&6144)>>11 // ASM-NEXT: .amdhsa_next_free_vgpr defined_value+4 // ASM-NEXT: .amdhsa_next_free_sgpr defined_value+5 // ASM-NEXT: .amdhsa_reserve_vcc defined_boolean // ASM-NEXT: .amdhsa_reserve_flat_scratch defined_boolean // ASM-NEXT: .amdhsa_reserve_xnack_mask 1 -// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((((3769368576|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&12288)>>12 -// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((((3769368576|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&49152)>>14 -// ASM-NEXT: .amdhsa_float_denorm_mode_32 (((((((((((((((((((3769368576|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&196608)>>16 -// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 (((((((((((((((((((3769368576|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&786432)>>18 -// ASM-NEXT: .amdhsa_dx10_clamp (((((((((((((((((((3769368576|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&2097152)>>21 -// ASM-NEXT: .amdhsa_ieee_mode (((((((((((((((((((3769368576|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&8388608)>>23 -// ASM-NEXT: .amdhsa_fp16_overflow (((((((((((((((((((3769368576|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&67108864)>>26 -// ASM-NEXT: .amdhsa_workgroup_processor_mode (((((((((((((((((((3769368576|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&536870912)>>29 -// ASM-NEXT: .amdhsa_memory_ordered (((((((((((((((((((3769368576|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&1073741824)>>30 -// ASM-NEXT: .amdhsa_forward_progress (((((((((((((((((((3769368576|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&2147483648)>>31 +// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((((3769368576|(defined_2_bits<<12))&~49152)|(defined_2_bits<<14))&~196608)|(defined_2_bits<<16))&~786432)|(defined_2_bits<<18))&~67108864)|(defined_boolean<<26))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~2147483648)|(defined_boolean<<31))&~63)|((alignto(max(defined_value+4, 1), 8)/8)-1))&~960)&12288)>>12 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((((3769368576|(defined_2_bits<<12))&~49152)|(defined_2_bits<<14))&~196608)|(defined_2_bits<<16))&~786432)|(defined_2_bits<<18))&~67108864)|(defined_boolean<<26))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~2147483648)|(defined_boolean<<31))&~63)|((alignto(max(defined_value+4, 1), 8)/8)-1))&~960)&49152)>>14 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 (((((((((((((((((((3769368576|(defined_2_bits<<12))&~49152)|(defined_2_bits<<14))&~196608)|(defined_2_bits<<16))&~786432)|(defined_2_bits<<18))&~67108864)|(defined_boolean<<26))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~2147483648)|(defined_boolean<<31))&~63)|((alignto(max(defined_value+4, 1), 8)/8)-1))&~960)&196608)>>16 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 (((((((((((((((((((3769368576|(defined_2_bits<<12))&~49152)|(defined_2_bits<<14))&~196608)|(defined_2_bits<<16))&~786432)|(defined_2_bits<<18))&~67108864)|(defined_boolean<<26))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~2147483648)|(defined_boolean<<31))&~63)|((alignto(max(defined_value+4, 1), 8)/8)-1))&~960)&786432)>>18 +// ASM-NEXT: .amdhsa_dx10_clamp (((((((((((((((((((3769368576|(defined_2_bits<<12))&~49152)|(defined_2_bits<<14))&~196608)|(defined_2_bits<<16))&~786432)|(defined_2_bits<<18))&~67108864)|(defined_boolean<<26))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~2147483648)|(defined_boolean<<31))&~63)|((alignto(max(defined_value+4, 1), 8)/8)-1))&~960)&2097152)>>21 +// ASM-NEXT: .amdhsa_ieee_mode (((((((((((((((((((3769368576|(defined_2_bits<<12))&~49152)|(defined_2_bits<<14))&~196608)|(defined_2_bits<<16))&~786432)|(defined_2_bits<<18))&~67108864)|(defined_boolean<<26))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~2147483648)|(defined_boolean<<31))&~63)|((alignto(max(defined_value+4, 1), 8)/8)-1))&~960)&8388608)>>23 +// ASM-NEXT: .amdhsa_fp16_overflow (((((((((((((((((((3769368576|(defined_2_bits<<12))&~49152)|(defined_2_bits<<14))&~196608)|(defined_2_bits<<16))&~786432)|(defined_2_bits<<18))&~67108864)|(defined_boolean<<26))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~2147483648)|(defined_boolean<<31))&~63)|((alignto(max(defined_value+4, 1), 8)/8)-1))&~960)&67108864)>>26 +// ASM-NEXT: .amdhsa_workgroup_processor_mode (((((((((((((((((((3769368576|(defined_2_bits<<12))&~49152)|(defined_2_bits<<14))&~196608)|(defined_2_bits<<16))&~786432)|(defined_2_bits<<18))&~67108864)|(defined_boolean<<26))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~2147483648)|(defined_boolean<<31))&~63)|((alignto(max(defined_value+4, 1), 8)/8)-1))&~960)&536870912)>>29 +// ASM-NEXT: .amdhsa_memory_ordered (((((((((((((((((((3769368576|(defined_2_bits<<12))&~49152)|(defined_2_bits<<14))&~196608)|(defined_2_bits<<16))&~786432)|(defined_2_bits<<18))&~67108864)|(defined_boolean<<26))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~2147483648)|(defined_boolean<<31))&~63)|((alignto(max(defined_value+4, 1), 8)/8)-1))&~960)&1073741824)>>30 +// ASM-NEXT: .amdhsa_forward_progress (((((((((((((((((((3769368576|(defined_2_bits<<12))&~49152)|(defined_2_bits<<14))&~196608)|(defined_2_bits<<16))&~786432)|(defined_2_bits<<18))&~67108864)|(defined_boolean<<26))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~2147483648)|(defined_boolean<<31))&~63)|((alignto(max(defined_value+4, 1), 8)/8)-1))&~960)&2147483648)>>31 // ASM-NEXT: .amdhsa_shared_vgpr_count 0 -// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&16777216)>>24 -// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&33554432)>>25 -// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&67108864)>>26 -// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&134217728)>>27 -// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&268435456)>>28 -// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&536870912)>>29 -// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&1073741824)>>30 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&16777216)>>24 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&33554432)>>25 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&67108864)>>26 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&134217728)>>27 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&268435456)>>28 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&536870912)>>29 +// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&1073741824)>>30 // ASM-NEXT: .end_amdhsa_kernel // ASM: .set defined_value, 41 diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s index b5b8a58b09a7f..8490f9bde2425 100644 --- a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s +++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s @@ -113,34 +113,34 @@ expr_defined: // ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0 // ASM-NEXT: .amdhsa_wavefront_size32 1 -// ASM-NEXT: .amdhsa_enable_private_segment ((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&1 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&128)>>7 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&256)>>8 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&512)>>9 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&1024)>>10 -// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&6144)>>11 +// ASM-NEXT: .amdhsa_enable_private_segment ((((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~1)|defined_boolean)&~62)&1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~1)|defined_boolean)&~62)&128)>>7 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~1)|defined_boolean)&~62)&256)>>8 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~1)|defined_boolean)&~62)&512)>>9 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~1)|defined_boolean)&~62)&1024)>>10 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~1)|defined_boolean)&~62)&6144)>>11 // ASM-NEXT: .amdhsa_next_free_vgpr defined_value+4 // ASM-NEXT: .amdhsa_next_free_sgpr defined_value+5 // ASM-NEXT: .amdhsa_reserve_vcc defined_boolean -// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((((3769368576|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&12288)>>12 -// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((((3769368576|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&49152)>>14 -// ASM-NEXT: .amdhsa_float_denorm_mode_32 (((((((((((((((((((3769368576|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&196608)>>16 -// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 (((((((((((((((((((3769368576|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&786432)>>18 -// ASM-NEXT: .amdhsa_dx10_clamp (((((((((((((((((((3769368576|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&2097152)>>21 -// ASM-NEXT: .amdhsa_ieee_mode (((((((((((((((((((3769368576|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&8388608)>>23 -// ASM-NEXT: .amdhsa_fp16_overflow (((((((((((((((((((3769368576|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&67108864)>>26 -// ASM-NEXT: .amdhsa_workgroup_processor_mode (((((((((((((((((((3769368576|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&536870912)>>29 -// ASM-NEXT: .amdhsa_memory_ordered (((((((((((((((((((3769368576|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&1073741824)>>30 -// ASM-NEXT: .amdhsa_forward_progress (((((((((((((((((((3769368576|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&2147483648)>>31 +// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((((3769368576|(defined_2_bits<<12))&~49152)|(defined_2_bits<<14))&~196608)|(defined_2_bits<<16))&~786432)|(defined_2_bits<<18))&~67108864)|(defined_boolean<<26))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~2147483648)|(defined_boolean<<31))&~63)|((alignto(max(defined_value+4, 1), 8)/8)-1))&~960)&12288)>>12 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((((3769368576|(defined_2_bits<<12))&~49152)|(defined_2_bits<<14))&~196608)|(defined_2_bits<<16))&~786432)|(defined_2_bits<<18))&~67108864)|(defined_boolean<<26))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~2147483648)|(defined_boolean<<31))&~63)|((alignto(max(defined_value+4, 1), 8)/8)-1))&~960)&49152)>>14 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 (((((((((((((((((((3769368576|(defined_2_bits<<12))&~49152)|(defined_2_bits<<14))&~196608)|(defined_2_bits<<16))&~786432)|(defined_2_bits<<18))&~67108864)|(defined_boolean<<26))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~2147483648)|(defined_boolean<<31))&~63)|((alignto(max(defined_value+4, 1), 8)/8)-1))&~960)&196608)>>16 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 (((((((((((((((((((3769368576|(defined_2_bits<<12))&~49152)|(defined_2_bits<<14))&~196608)|(defined_2_bits<<16))&~786432)|(defined_2_bits<<18))&~67108864)|(defined_boolean<<26))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~2147483648)|(defined_boolean<<31))&~63)|((alignto(max(defined_value+4, 1), 8)/8)-1))&~960)&786432)>>18 +// ASM-NEXT: .amdhsa_dx10_clamp (((((((((((((((((((3769368576|(defined_2_bits<<12))&~49152)|(defined_2_bits<<14))&~196608)|(defined_2_bits<<16))&~786432)|(defined_2_bits<<18))&~67108864)|(defined_boolean<<26))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~2147483648)|(defined_boolean<<31))&~63)|((alignto(max(defined_value+4, 1), 8)/8)-1))&~960)&2097152)>>21 +// ASM-NEXT: .amdhsa_ieee_mode (((((((((((((((((((3769368576|(defined_2_bits<<12))&~49152)|(defined_2_bits<<14))&~196608)|(defined_2_bits<<16))&~786432)|(defined_2_bits<<18))&~67108864)|(defined_boolean<<26))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~2147483648)|(defined_boolean<<31))&~63)|((alignto(max(defined_value+4, 1), 8)/8)-1))&~960)&8388608)>>23 +// ASM-NEXT: .amdhsa_fp16_overflow (((((((((((((((((((3769368576|(defined_2_bits<<12))&~49152)|(defined_2_bits<<14))&~196608)|(defined_2_bits<<16))&~786432)|(defined_2_bits<<18))&~67108864)|(defined_boolean<<26))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~2147483648)|(defined_boolean<<31))&~63)|((alignto(max(defined_value+4, 1), 8)/8)-1))&~960)&67108864)>>26 +// ASM-NEXT: .amdhsa_workgroup_processor_mode (((((((((((((((((((3769368576|(defined_2_bits<<12))&~49152)|(defined_2_bits<<14))&~196608)|(defined_2_bits<<16))&~786432)|(defined_2_bits<<18))&~67108864)|(defined_boolean<<26))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~2147483648)|(defined_boolean<<31))&~63)|((alignto(max(defined_value+4, 1), 8)/8)-1))&~960)&536870912)>>29 +// ASM-NEXT: .amdhsa_memory_ordered (((((((((((((((((((3769368576|(defined_2_bits<<12))&~49152)|(defined_2_bits<<14))&~196608)|(defined_2_bits<<16))&~786432)|(defined_2_bits<<18))&~67108864)|(defined_boolean<<26))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~2147483648)|(defined_boolean<<31))&~63)|((alignto(max(defined_value+4, 1), 8)/8)-1))&~960)&1073741824)>>30 +// ASM-NEXT: .amdhsa_forward_progress (((((((((((((((((((3769368576|(defined_2_bits<<12))&~49152)|(defined_2_bits<<14))&~196608)|(defined_2_bits<<16))&~786432)|(defined_2_bits<<18))&~67108864)|(defined_boolean<<26))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~2147483648)|(defined_boolean<<31))&~63)|((alignto(max(defined_value+4, 1), 8)/8)-1))&~960)&2147483648)>>31 // ASM-NEXT: .amdhsa_shared_vgpr_count 0 // ASM-NEXT: .amdhsa_inst_pref_size 0 -// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&16777216)>>24 -// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&33554432)>>25 -// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&67108864)>>26 -// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&134217728)>>27 -// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&268435456)>>28 -// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&536870912)>>29 -// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&1073741824)>>30 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~1)|defined_boolean)&~62)&16777216)>>24 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~1)|defined_boolean)&~62)&33554432)>>25 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~1)|defined_boolean)&~62)&67108864)>>26 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~1)|defined_boolean)&~62)&134217728)>>27 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~1)|defined_boolean)&~62)&268435456)>>28 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~1)|defined_boolean)&~62)&536870912)>>29 +// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~1)|defined_boolean)&~62)&1073741824)>>30 // ASM-NEXT: .end_amdhsa_kernel // ASM: .set defined_value, 41 diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s index 0efd323ae9a34..ab1a5891ab22c 100644 --- a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s +++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s @@ -117,32 +117,32 @@ expr_defined: // ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0 // ASM-NEXT: .amdhsa_wavefront_size32 1 -// ASM-NEXT: .amdhsa_enable_private_segment ((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&1 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&128)>>7 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&256)>>8 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&512)>>9 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&1024)>>10 -// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&6144)>>11 +// ASM-NEXT: .amdhsa_enable_private_segment ((((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~1)|defined_boolean)&~62)&1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~1)|defined_boolean)&~62)&128)>>7 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~1)|defined_boolean)&~62)&256)>>8 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~1)|defined_boolean)&~62)&512)>>9 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~1)|defined_boolean)&~62)&1024)>>10 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~1)|defined_boolean)&~62)&6144)>>11 // ASM-NEXT: .amdhsa_next_free_vgpr defined_value+4 // ASM-NEXT: .amdhsa_next_free_sgpr defined_value+5 // ASM-NEXT: .amdhsa_reserve_vcc defined_boolean -// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((((((3758882816|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&12288)>>12 -// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((((((3758882816|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&49152)>>14 -// ASM-NEXT: .amdhsa_float_denorm_mode_32 (((((((((((((((((((((3758882816|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&196608)>>16 -// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 (((((((((((((((((((((3758882816|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&786432)>>18 -// ASM-NEXT: .amdhsa_fp16_overflow (((((((((((((((((((((3758882816|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&67108864)>>26 -// ASM-NEXT: .amdhsa_workgroup_processor_mode (((((((((((((((((((((3758882816|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&536870912)>>29 -// ASM-NEXT: .amdhsa_memory_ordered (((((((((((((((((((((3758882816|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&1073741824)>>30 -// ASM-NEXT: .amdhsa_forward_progress (((((((((((((((((((((3758882816|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&2147483648)>>31 +// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((((((3758882816|(defined_2_bits<<12))&~49152)|(defined_2_bits<<14))&~196608)|(defined_2_bits<<16))&~786432)|(defined_2_bits<<18))&~67108864)|(defined_boolean<<26))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~2147483648)|(defined_boolean<<31))&~2097152)|(defined_boolean<<21))&~63)|((alignto(max(defined_value+4, 1), 8)/8)-1))&~960)&12288)>>12 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((((((3758882816|(defined_2_bits<<12))&~49152)|(defined_2_bits<<14))&~196608)|(defined_2_bits<<16))&~786432)|(defined_2_bits<<18))&~67108864)|(defined_boolean<<26))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~2147483648)|(defined_boolean<<31))&~2097152)|(defined_boolean<<21))&~63)|((alignto(max(defined_value+4, 1), 8)/8)-1))&~960)&49152)>>14 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 (((((((((((((((((((((3758882816|(defined_2_bits<<12))&~49152)|(defined_2_bits<<14))&~196608)|(defined_2_bits<<16))&~786432)|(defined_2_bits<<18))&~67108864)|(defined_boolean<<26))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~2147483648)|(defined_boolean<<31))&~2097152)|(defined_boolean<<21))&~63)|((alignto(max(defined_value+4, 1), 8)/8)-1))&~960)&196608)>>16 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 (((((((((((((((((((((3758882816|(defined_2_bits<<12))&~49152)|(defined_2_bits<<14))&~196608)|(defined_2_bits<<16))&~786432)|(defined_2_bits<<18))&~67108864)|(defined_boolean<<26))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~2147483648)|(defined_boolean<<31))&~2097152)|(defined_boolean<<21))&~63)|((alignto(max(defined_value+4, 1), 8)/8)-1))&~960)&786432)>>18 +// ASM-NEXT: .amdhsa_fp16_overflow (((((((((((((((((((((3758882816|(defined_2_bits<<12))&~49152)|(defined_2_bits<<14))&~196608)|(defined_2_bits<<16))&~786432)|(defined_2_bits<<18))&~67108864)|(defined_boolean<<26))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~2147483648)|(defined_boolean<<31))&~2097152)|(defined_boolean<<21))&~63)|((alignto(max(defined_value+4, 1), 8)/8)-1))&~960)&67108864)>>26 +// ASM-NEXT: .amdhsa_workgroup_processor_mode (((((((((((((((((((((3758882816|(defined_2_bits<<12))&~49152)|(defined_2_bits<<14))&~196608)|(defined_2_bits<<16))&~786432)|(defined_2_bits<<18))&~67108864)|(defined_boolean<<26))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~2147483648)|(defined_boolean<<31))&~2097152)|(defined_boolean<<21))&~63)|((alignto(max(defined_value+4, 1), 8)/8)-1))&~960)&536870912)>>29 +// ASM-NEXT: .amdhsa_memory_ordered (((((((((((((((((((((3758882816|(defined_2_bits<<12))&~49152)|(defined_2_bits<<14))&~196608)|(defined_2_bits<<16))&~786432)|(defined_2_bits<<18))&~67108864)|(defined_boolean<<26))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~2147483648)|(defined_boolean<<31))&~2097152)|(defined_boolean<<21))&~63)|((alignto(max(defined_value+4, 1), 8)/8)-1))&~960)&1073741824)>>30 +// ASM-NEXT: .amdhsa_forward_progress (((((((((((((((((((((3758882816|(defined_2_bits<<12))&~49152)|(defined_2_bits<<14))&~196608)|(defined_2_bits<<16))&~786432)|(defined_2_bits<<18))&~67108864)|(defined_boolean<<26))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~2147483648)|(defined_boolean<<31))&~2097152)|(defined_boolean<<21))&~63)|((alignto(max(defined_value+4, 1), 8)/8)-1))&~960)&2147483648)>>31 // ASM-NEXT: .amdhsa_inst_pref_size (((defined_value+6)<<4)&4080)>>4 -// ASM-NEXT: .amdhsa_round_robin_scheduling (((((((((((((((((((((3758882816|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&2097152)>>21 -// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&16777216)>>24 -// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&33554432)>>25 -// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&67108864)>>26 -// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&134217728)>>27 -// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&268435456)>>28 -// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&536870912)>>29 -// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&1073741824)>>30 +// ASM-NEXT: .amdhsa_round_robin_scheduling (((((((((((((((((((((3758882816|(defined_2_bits<<12))&~49152)|(defined_2_bits<<14))&~196608)|(defined_2_bits<<16))&~786432)|(defined_2_bits<<18))&~67108864)|(defined_boolean<<26))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~2147483648)|(defined_boolean<<31))&~2097152)|(defined_boolean<<21))&~63)|((alignto(max(defined_value+4, 1), 8)/8)-1))&~960)&2097152)>>21 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~1)|defined_boolean)&~62)&16777216)>>24 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~1)|defined_boolean)&~62)&33554432)>>25 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~1)|defined_boolean)&~62)&67108864)>>26 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~1)|defined_boolean)&~62)&134217728)>>27 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~1)|defined_boolean)&~62)&268435456)>>28 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~1)|defined_boolean)&~62)&536870912)>>29 +// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~1)|defined_boolean)&~62)&1073741824)>>30 // ASM-NEXT: .end_amdhsa_kernel // ASM: .set defined_value, 41 diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx7.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx7.s index 485f48c695c4d..9fae37c38735f 100644 --- a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx7.s +++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx7.s @@ -106,29 +106,29 @@ expr_defined: // ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0 // ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0 -// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset ((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&1 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&128)>>7 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&256)>>8 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&512)>>9 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&1024)>>10 -// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&6144)>>11 +// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset ((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&128)>>7 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&256)>>8 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&512)>>9 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&1024)>>10 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&6144)>>11 // ASM-NEXT: .amdhsa_next_free_vgpr defined_value+4 // ASM-NEXT: .amdhsa_next_free_sgpr defined_value+5 // ASM-NEXT: .amdhsa_reserve_vcc defined_boolean // ASM-NEXT: .amdhsa_reserve_flat_scratch defined_boolean -// ASM-NEXT: .amdhsa_float_round_mode_32 ((((((((((((11272192|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(((alignto(max(defined_value+4, 1), 4))/4)-1))&(~960))|((((alignto(max((defined_value+5)+(extrasgprs(defined_boolean, defined_boolean, 0)), 1), 8))/8)-1)<<6))&12288)>>12 -// ASM-NEXT: .amdhsa_float_round_mode_16_64 ((((((((((((11272192|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(((alignto(max(defined_value+4, 1), 4))/4)-1))&(~960))|((((alignto(max((defined_value+5)+(extrasgprs(defined_boolean, defined_boolean, 0)), 1), 8))/8)-1)<<6))&49152)>>14 -// ASM-NEXT: .amdhsa_float_denorm_mode_32 ((((((((((((11272192|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(((alignto(max(defined_value+4, 1), 4))/4)-1))&(~960))|((((alignto(max((defined_value+5)+(extrasgprs(defined_boolean, defined_boolean, 0)), 1), 8))/8)-1)<<6))&196608)>>16 -// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 ((((((((((((11272192|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(((alignto(max(defined_value+4, 1), 4))/4)-1))&(~960))|((((alignto(max((defined_value+5)+(extrasgprs(defined_boolean, defined_boolean, 0)), 1), 8))/8)-1)<<6))&786432)>>18 +// ASM-NEXT: .amdhsa_float_round_mode_32 ((((((((((((11272192|(defined_2_bits<<12))&~49152)|(defined_2_bits<<14))&~196608)|(defined_2_bits<<16))&~786432)|(defined_2_bits<<18))&~63)|((alignto(max(defined_value+4, 1), 4)/4)-1))&~960)|(((alignto(max(defined_value+5+extrasgprs(defined_boolean, defined_boolean, 0), 1), 8)/8)-1)<<6))&12288)>>12 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 ((((((((((((11272192|(defined_2_bits<<12))&~49152)|(defined_2_bits<<14))&~196608)|(defined_2_bits<<16))&~786432)|(defined_2_bits<<18))&~63)|((alignto(max(defined_value+4, 1), 4)/4)-1))&~960)|(((alignto(max(defined_value+5+extrasgprs(defined_boolean, defined_boolean, 0), 1), 8)/8)-1)<<6))&49152)>>14 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 ((((((((((((11272192|(defined_2_bits<<12))&~49152)|(defined_2_bits<<14))&~196608)|(defined_2_bits<<16))&~786432)|(defined_2_bits<<18))&~63)|((alignto(max(defined_value+4, 1), 4)/4)-1))&~960)|(((alignto(max(defined_value+5+extrasgprs(defined_boolean, defined_boolean, 0), 1), 8)/8)-1)<<6))&196608)>>16 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 ((((((((((((11272192|(defined_2_bits<<12))&~49152)|(defined_2_bits<<14))&~196608)|(defined_2_bits<<16))&~786432)|(defined_2_bits<<18))&~63)|((alignto(max(defined_value+4, 1), 4)/4)-1))&~960)|(((alignto(max(defined_value+5+extrasgprs(defined_boolean, defined_boolean, 0), 1), 8)/8)-1)<<6))&786432)>>18 // ASM-NEXT: .amdhsa_dx10_clamp 1 // ASM-NEXT: .amdhsa_ieee_mode 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&16777216)>>24 -// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&33554432)>>25 -// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&67108864)>>26 -// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&134217728)>>27 -// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&268435456)>>28 -// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&536870912)>>29 -// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&1073741824)>>30 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&16777216)>>24 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&33554432)>>25 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&67108864)>>26 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&134217728)>>27 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&268435456)>>28 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&536870912)>>29 +// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&1073741824)>>30 // ASM-NEXT: .end_amdhsa_kernel // ASM: .set defined_value, 41 diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx8.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx8.s index 0d2e066113ee8..4b6cb01c18d8f 100644 --- a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx8.s +++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx8.s @@ -107,30 +107,30 @@ expr_defined: // ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0 // ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0 -// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset ((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&1 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&128)>>7 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&256)>>8 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&512)>>9 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&1024)>>10 -// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&6144)>>11 +// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset ((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&128)>>7 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&256)>>8 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&512)>>9 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&1024)>>10 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&6144)>>11 // ASM-NEXT: .amdhsa_next_free_vgpr defined_value+4 // ASM-NEXT: .amdhsa_next_free_sgpr defined_value+5 // ASM-NEXT: .amdhsa_reserve_vcc defined_boolean // ASM-NEXT: .amdhsa_reserve_flat_scratch defined_boolean // ASM-NEXT: .amdhsa_reserve_xnack_mask 1 -// ASM-NEXT: .amdhsa_float_round_mode_32 ((((((((((((11272192|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(((alignto(max(defined_value+4, 1), 4))/4)-1))&(~960))|((((alignto(max((defined_value+5)+(extrasgprs(defined_boolean, defined_boolean, 1)), 1), 8))/8)-1)<<6))&12288)>>12 -// ASM-NEXT: .amdhsa_float_round_mode_16_64 ((((((((((((11272192|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(((alignto(max(defined_value+4, 1), 4))/4)-1))&(~960))|((((alignto(max((defined_value+5)+(extrasgprs(defined_boolean, defined_boolean, 1)), 1), 8))/8)-1)<<6))&49152)>>14 -// ASM-NEXT: .amdhsa_float_denorm_mode_32 ((((((((((((11272192|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(((alignto(max(defined_value+4, 1), 4))/4)-1))&(~960))|((((alignto(max((defined_value+5)+(extrasgprs(defined_boolean, defined_boolean, 1)), 1), 8))/8)-1)<<6))&196608)>>16 -// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 ((((((((((((11272192|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~63))|(((alignto(max(defined_value+4, 1), 4))/4)-1))&(~960))|((((alignto(max((defined_value+5)+(extrasgprs(defined_boolean, defined_boolean, 1)), 1), 8))/8)-1)<<6))&786432)>>18 +// ASM-NEXT: .amdhsa_float_round_mode_32 ((((((((((((11272192|(defined_2_bits<<12))&~49152)|(defined_2_bits<<14))&~196608)|(defined_2_bits<<16))&~786432)|(defined_2_bits<<18))&~63)|((alignto(max(defined_value+4, 1), 4)/4)-1))&~960)|(((alignto(max(defined_value+5+extrasgprs(defined_boolean, defined_boolean, 1), 1), 8)/8)-1)<<6))&12288)>>12 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 ((((((((((((11272192|(defined_2_bits<<12))&~49152)|(defined_2_bits<<14))&~196608)|(defined_2_bits<<16))&~786432)|(defined_2_bits<<18))&~63)|((alignto(max(defined_value+4, 1), 4)/4)-1))&~960)|(((alignto(max(defined_value+5+extrasgprs(defined_boolean, defined_boolean, 1), 1), 8)/8)-1)<<6))&49152)>>14 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 ((((((((((((11272192|(defined_2_bits<<12))&~49152)|(defined_2_bits<<14))&~196608)|(defined_2_bits<<16))&~786432)|(defined_2_bits<<18))&~63)|((alignto(max(defined_value+4, 1), 4)/4)-1))&~960)|(((alignto(max(defined_value+5+extrasgprs(defined_boolean, defined_boolean, 1), 1), 8)/8)-1)<<6))&196608)>>16 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 ((((((((((((11272192|(defined_2_bits<<12))&~49152)|(defined_2_bits<<14))&~196608)|(defined_2_bits<<16))&~786432)|(defined_2_bits<<18))&~63)|((alignto(max(defined_value+4, 1), 4)/4)-1))&~960)|(((alignto(max(defined_value+5+extrasgprs(defined_boolean, defined_boolean, 1), 1), 8)/8)-1)<<6))&786432)>>18 // ASM-NEXT: .amdhsa_dx10_clamp 1 // ASM-NEXT: .amdhsa_ieee_mode 1 -// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&16777216)>>24 -// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&33554432)>>25 -// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&67108864)>>26 -// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&134217728)>>27 -// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&268435456)>>28 -// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&536870912)>>29 -// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))&1073741824)>>30 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&16777216)>>24 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&33554432)>>25 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&67108864)>>26 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&134217728)>>27 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&268435456)>>28 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&536870912)>>29 +// ASM-NEXT: .amdhsa_exception_int_div_zero (((((((((((((((((((((((((128|(defined_2_bits<<11))&~128)|(defined_boolean<<7))&~256)|(defined_boolean<<8))&~512)|(defined_boolean<<9))&~1024)|(defined_boolean<<10))&~16777216)|(defined_boolean<<24))&~33554432)|(defined_boolean<<25))&~67108864)|(defined_boolean<<26))&~134217728)|(defined_boolean<<27))&~268435456)|(defined_boolean<<28))&~536870912)|(defined_boolean<<29))&~1073741824)|(defined_boolean<<30))&~62)&1073741824)>>30 // ASM-NEXT: .end_amdhsa_kernel // ASM: .set defined_value, 41 diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s index 88b5e23a6f2c5..4b750d4d0fcf6 100644 --- a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s +++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s @@ -80,33 +80,33 @@ expr_defined: // ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_length 0 // ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_offset 0 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0 -// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset ((128|defined_boolean)&(~62))&1 +// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset ((128|defined_boolean)&~62)&1 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((128|defined_boolean)&(~62))&256)>>8 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((128|defined_boolean)&(~62))&512)>>9 -// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((128|defined_boolean)&(~62))&1024)>>10 -// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((128|defined_boolean)&(~62))&6144)>>11 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((128|defined_boolean)&~62)&256)>>8 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((128|defined_boolean)&~62)&512)>>9 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((128|defined_boolean)&~62)&1024)>>10 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((128|defined_boolean)&~62)&6144)>>11 // ASM-NEXT: .amdhsa_next_free_vgpr defined_boolean+1 // ASM-NEXT: .amdhsa_next_free_sgpr defined_boolean+2 // ASM-NEXT: .amdhsa_accum_offset 4 // ASM-NEXT: .amdhsa_reserve_vcc defined_boolean // ASM-NEXT: .amdhsa_reserve_flat_scratch defined_boolean // ASM-NEXT: .amdhsa_reserve_xnack_mask 1 -// ASM-NEXT: .amdhsa_float_round_mode_32 ((((((((((9175040|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(((alignto(max(defined_boolean+1, 1), 8))/8)-1))&(~960))|((((alignto(max((defined_boolean+2)+(extrasgprs(defined_boolean, defined_boolean, 1)), 1), 8))/8)-1)<<6))&12288)>>12 -// ASM-NEXT: .amdhsa_float_round_mode_16_64 ((((((((((9175040|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(((alignto(max(defined_boolean+1, 1), 8))/8)-1))&(~960))|((((alignto(max((defined_boolean+2)+(extrasgprs(defined_boolean, defined_boolean, 1)), 1), 8))/8)-1)<<6))&49152)>>14 -// ASM-NEXT: .amdhsa_float_denorm_mode_32 ((((((((((9175040|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(((alignto(max(defined_boolean+1, 1), 8))/8)-1))&(~960))|((((alignto(max((defined_boolean+2)+(extrasgprs(defined_boolean, defined_boolean, 1)), 1), 8))/8)-1)<<6))&196608)>>16 +// ASM-NEXT: .amdhsa_float_round_mode_32 ((((((((((9175040|(defined_boolean<<21))&~8388608)|(defined_boolean<<23))&~67108864)|(defined_boolean<<26))&~63)|((alignto(max(defined_boolean+1, 1), 8)/8)-1))&~960)|(((alignto(max(defined_boolean+2+extrasgprs(defined_boolean, defined_boolean, 1), 1), 8)/8)-1)<<6))&12288)>>12 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 ((((((((((9175040|(defined_boolean<<21))&~8388608)|(defined_boolean<<23))&~67108864)|(defined_boolean<<26))&~63)|((alignto(max(defined_boolean+1, 1), 8)/8)-1))&~960)|(((alignto(max(defined_boolean+2+extrasgprs(defined_boolean, defined_boolean, 1), 1), 8)/8)-1)<<6))&49152)>>14 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 ((((((((((9175040|(defined_boolean<<21))&~8388608)|(defined_boolean<<23))&~67108864)|(defined_boolean<<26))&~63)|((alignto(max(defined_boolean+1, 1), 8)/8)-1))&~960)|(((alignto(max(defined_boolean+2+extrasgprs(defined_boolean, defined_boolean, 1), 1), 8)/8)-1)<<6))&196608)>>16 // ASM-NEXT: .amdhsa_float_denorm_mode_16_64 3 -// ASM-NEXT: .amdhsa_dx10_clamp ((((((((((9175040|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(((alignto(max(defined_boolean+1, 1), 8))/8)-1))&(~960))|((((alignto(max((defined_boolean+2)+(extrasgprs(defined_boolean, defined_boolean, 1)), 1), 8))/8)-1)<<6))&2097152)>>21 -// ASM-NEXT: .amdhsa_ieee_mode ((((((((((9175040|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(((alignto(max(defined_boolean+1, 1), 8))/8)-1))&(~960))|((((alignto(max((defined_boolean+2)+(extrasgprs(defined_boolean, defined_boolean, 1)), 1), 8))/8)-1)<<6))&8388608)>>23 -// ASM-NEXT: .amdhsa_fp16_overflow ((((((((((9175040|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(((alignto(max(defined_boolean+1, 1), 8))/8)-1))&(~960))|((((alignto(max((defined_boolean+2)+(extrasgprs(defined_boolean, defined_boolean, 1)), 1), 8))/8)-1)<<6))&67108864)>>26 -// ASM-NEXT: .amdhsa_tg_split (((defined_boolean<<16)&(~63))&65536)>>16 -// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((128|defined_boolean)&(~62))&16777216)>>24 -// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((128|defined_boolean)&(~62))&33554432)>>25 -// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((128|defined_boolean)&(~62))&67108864)>>26 -// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((128|defined_boolean)&(~62))&134217728)>>27 -// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((128|defined_boolean)&(~62))&268435456)>>28 -// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((128|defined_boolean)&(~62))&536870912)>>29 -// ASM-NEXT: .amdhsa_exception_int_div_zero (((128|defined_boolean)&(~62))&1073741824)>>30 +// ASM-NEXT: .amdhsa_dx10_clamp ((((((((((9175040|(defined_boolean<<21))&~8388608)|(defined_boolean<<23))&~67108864)|(defined_boolean<<26))&~63)|((alignto(max(defined_boolean+1, 1), 8)/8)-1))&~960)|(((alignto(max(defined_boolean+2+extrasgprs(defined_boolean, defined_boolean, 1), 1), 8)/8)-1)<<6))&2097152)>>21 +// ASM-NEXT: .amdhsa_ieee_mode ((((((((((9175040|(defined_boolean<<21))&~8388608)|(defined_boolean<<23))&~67108864)|(defined_boolean<<26))&~63)|((alignto(max(defined_boolean+1, 1), 8)/8)-1))&~960)|(((alignto(max(defined_boolean+2+extrasgprs(defined_boolean, defined_boolean, 1), 1), 8)/8)-1)<<6))&8388608)>>23 +// ASM-NEXT: .amdhsa_fp16_overflow ((((((((((9175040|(defined_boolean<<21))&~8388608)|(defined_boolean<<23))&~67108864)|(defined_boolean<<26))&~63)|((alignto(max(defined_boolean+1, 1), 8)/8)-1))&~960)|(((alignto(max(defined_boolean+2+extrasgprs(defined_boolean, defined_boolean, 1), 1), 8)/8)-1)<<6))&67108864)>>26 +// ASM-NEXT: .amdhsa_tg_split (((defined_boolean<<16)&~63)&65536)>>16 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((128|defined_boolean)&~62)&16777216)>>24 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((128|defined_boolean)&~62)&33554432)>>25 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((128|defined_boolean)&~62)&67108864)>>26 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow (((128|defined_boolean)&~62)&134217728)>>27 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow (((128|defined_boolean)&~62)&268435456)>>28 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact (((128|defined_boolean)&~62)&536870912)>>29 +// ASM-NEXT: .amdhsa_exception_int_div_zero (((128|defined_boolean)&~62)&1073741824)>>30 // ASM-NEXT: .end_amdhsa_kernel // ASM: .set defined_boolean, 1 diff --git a/llvm/test/MC/AMDGPU/mcexpr_amd.s b/llvm/test/MC/AMDGPU/mcexpr_amd.s index a9639c3acc305..d7340bb5fd2ed 100644 --- a/llvm/test/MC/AMDGPU/mcexpr_amd.s +++ b/llvm/test/MC/AMDGPU/mcexpr_amd.s @@ -74,7 +74,7 @@ .set max_neg_number, max(neg_one) // ASM: .set max_with_subexpr, 3 -// ASM: .set max_as_subexpr, 1+(max(4, 3, five)) +// ASM: .set max_as_subexpr, 1+max(4, 3, five) // ASM: .set max_recursive_subexpr, max(max(1, four), 3, max_expression_all) .set max_with_subexpr, max(((one | 3) << 3) / 8) @@ -112,7 +112,7 @@ .set or_with_or_sym, or(or, 4, 3, one, two) // ASM: .set or_with_subexpr, 3 -// ASM: .set or_as_subexpr, 1+(or(4, 3, five)) +// ASM: .set or_as_subexpr, 1+or(4, 3, five) // ASM: .set or_recursive_subexpr, or(or(1, four), 3, or_expression_all) .set or_with_subexpr, or(((one | 3) << 3) / 8) diff --git a/llvm/test/MC/ARM/arm-branches.s b/llvm/test/MC/ARM/arm-branches.s index e18fa5de12584..5af5a28612ac1 100644 --- a/llvm/test/MC/ARM/arm-branches.s +++ b/llvm/test/MC/ARM/arm-branches.s @@ -28,13 +28,13 @@ bl $4 beq $4 + 4 -@ CHECK: b ($foo) @ encoding: [A,A,A,0xea] -@ CHECK: bl ($foo) @ encoding: [A,A,A,0xeb] -@ CHECK: beq ($foo) @ encoding: [A,A,A,0x0a] -@ CHECK: blx ($foo) @ encoding: [A,A,A,0xfa] -@ CHECK: b #($foo)+4 @ encoding: [A,A,A,0xea] -@ CHECK: bl ($4) @ encoding: [A,A,A,0xeb] -@ CHECK: beq #($4)+4 @ encoding: [A,A,A,0x0a] +@ CHECK: b $foo @ encoding: [A,A,A,0xea] +@ CHECK: bl $foo @ encoding: [A,A,A,0xeb] +@ CHECK: beq $foo @ encoding: [A,A,A,0x0a] +@ CHECK: blx $foo @ encoding: [A,A,A,0xfa] +@ CHECK: b #$foo+4 @ encoding: [A,A,A,0xea] +@ CHECK: bl $4 @ encoding: [A,A,A,0xeb] +@ CHECK: beq #$4+4 @ encoding: [A,A,A,0x0a] @------------------------------------------------------------------------------ @ Leading '$' should be allowed to introduce an expression diff --git a/llvm/test/MC/ARM/basic-arm-instructions.s b/llvm/test/MC/ARM/basic-arm-instructions.s index 9f3a5cd4afa79..4c62e8f34c3cf 100644 --- a/llvm/test/MC/ARM/basic-arm-instructions.s +++ b/llvm/test/MC/ARM/basic-arm-instructions.s @@ -289,8 +289,8 @@ Lforward: @ CHECK: addseq r0, pc, #-1073741824 @ encoding: [0x03,0x01,0x9f,0x02] @ CHECK: Ltmp0: @ CHECK-NEXT: Ltmp1: -@ CHECK-NEXT: adr r0, (Ltmp1+8)+(Lback-Ltmp0) @ encoding: [A,A,0x0f'A',0xe2'A'] -@ CHECK-NEXT: @ fixup A - offset: 0, value: (Ltmp1+8)+(Lback-Ltmp0), kind: fixup_arm_adr_pcrel_12 +@ CHECK-NEXT: adr r0, Ltmp1+8+(Lback-Ltmp0) @ encoding: [A,A,0x0f'A',0xe2'A'] +@ CHECK-NEXT: @ fixup A - offset: 0, value: Ltmp1+8+(Lback-Ltmp0), kind: fixup_arm_adr_pcrel_12 @ Test right shift by 32, which is encoded as 0 add r3, r1, r2, lsr #32 diff --git a/llvm/test/MC/ARM/elf-movt.s b/llvm/test/MC/ARM/elf-movt.s index 72dad26d06664..3a9f162f7f842 100644 --- a/llvm/test/MC/ARM/elf-movt.s +++ b/llvm/test/MC/ARM/elf-movt.s @@ -26,8 +26,8 @@ bar: @ ASM-NEXT: movt r0, :upper16:(GOT-(.LPC0_2+8)) @ ASM: movw r0, :lower16:(extern_symbol+1234) @ ASM-NEXT: movt r0, :upper16:(extern_symbol+1234) -@ ASM: movw r0, :lower16:((foo-bar)+1234) -@ ASM-NEXT: movt r0, :upper16:((foo-bar)+1234) +@ ASM: movw r0, :lower16:(foo-bar+1234) +@ ASM-NEXT: movt r0, :upper16:(foo-bar+1234) @OBJ: Disassembly of section .text: @OBJ-EMPTY: diff --git a/llvm/test/MC/ARM/macho-word-reloc-thumb.s b/llvm/test/MC/ARM/macho-word-reloc-thumb.s index bd98f6b33f974..a76684d35d83a 100644 --- a/llvm/test/MC/ARM/macho-word-reloc-thumb.s +++ b/llvm/test/MC/ARM/macho-word-reloc-thumb.s @@ -4,7 +4,7 @@ @ ARM relocatable object files try to look like they're pre-linked, so the @ offsets in the instructions are a best-guess. I suspect the "-3" should b -@ CHECK: movw r1, :lower16:((_bar-8)-3) +@ CHECK: movw r1, :lower16:(_bar-8-3) @ [...] @ CHECK: .long {{[0-9]*[13579]}} diff --git a/llvm/test/MC/AVR/inst-brbc.s b/llvm/test/MC/AVR/inst-brbc.s index 4edbbfd858024..6aa6ed0863e0c 100644 --- a/llvm/test/MC/AVR/inst-brbc.s +++ b/llvm/test/MC/AVR/inst-brbc.s @@ -10,11 +10,11 @@ foo: .short 0xf74c .short 0xf4c7 -; CHECK: brvc (.Ltmp0+8)+2 ; encoding: [0bAAAAA011,0b111101AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+8)+2, kind: fixup_7_pcrel +; CHECK: brvc .Ltmp0+8+2 ; encoding: [0bAAAAA011,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp0+8+2, kind: fixup_7_pcrel ; -; CHECK: brcc (.Ltmp1-16)+2 ; encoding: [0bAAAAA000,0b111101AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1-16)+2, kind: fixup_7_pcrel +; CHECK: brcc .Ltmp1-16+2 ; encoding: [0bAAAAA000,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp1-16+2, kind: fixup_7_pcrel ; INST-LABEL: : ; INST-NEXT: 23 f4 brvc .+8 diff --git a/llvm/test/MC/AVR/inst-brbs.s b/llvm/test/MC/AVR/inst-brbs.s index 3f4b134aef682..abadd10a134f8 100644 --- a/llvm/test/MC/AVR/inst-brbs.s +++ b/llvm/test/MC/AVR/inst-brbs.s @@ -10,10 +10,10 @@ foo: .short 0xf34c .short 0xf077 -; CHECK: brvs (.Ltmp0+8)+2 ; encoding: [0bAAAAA011,0b111100AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+8)+2, kind: fixup_7_pcrel -; CHECK: brcs (.Ltmp1-12)+2 ; encoding: [0bAAAAA000,0b111100AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1-12)+2, kind: fixup_7_pcrel +; CHECK: brvs .Ltmp0+8+2 ; encoding: [0bAAAAA011,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp0+8+2, kind: fixup_7_pcrel +; CHECK: brcs .Ltmp1-12+2 ; encoding: [0bAAAAA000,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp1-12+2, kind: fixup_7_pcrel ; INST-LABEL: : ; INST-NEXT: 23 f0 brvs .+8 diff --git a/llvm/test/MC/AVR/inst-brcc.s b/llvm/test/MC/AVR/inst-brcc.s index dd1b2b11a6d30..90d7dc3eca1b9 100644 --- a/llvm/test/MC/AVR/inst-brcc.s +++ b/llvm/test/MC/AVR/inst-brcc.s @@ -12,12 +12,12 @@ foo: bar: -; CHECK: brcc (.Ltmp0+66)+2 ; encoding: [0bAAAAA000,0b111101AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+66)+2, kind: fixup_7_pcrel -; CHECK: brcc (.Ltmp1-22)+2 ; encoding: [0bAAAAA000,0b111101AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1-22)+2, kind: fixup_7_pcrel -; CHECK: brcc (.Ltmp2+66)+2 ; encoding: [0bAAAAA000,0b111101AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp2+66)+2, kind: fixup_7_pcrel +; CHECK: brcc .Ltmp0+66+2 ; encoding: [0bAAAAA000,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp0+66+2, kind: fixup_7_pcrel +; CHECK: brcc .Ltmp1-22+2 ; encoding: [0bAAAAA000,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp1-22+2, kind: fixup_7_pcrel +; CHECK: brcc .Ltmp2+66+2 ; encoding: [0bAAAAA000,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp2+66+2, kind: fixup_7_pcrel ; CHECK: brcc bar ; encoding: [0bAAAAA000,0b111101AA] ; CHECK-NEXT: ; fixup A - offset: 0, value: bar, kind: fixup_7_pcrel diff --git a/llvm/test/MC/AVR/inst-brcs.s b/llvm/test/MC/AVR/inst-brcs.s index 3fafccdb49257..74cc9a1c97805 100644 --- a/llvm/test/MC/AVR/inst-brcs.s +++ b/llvm/test/MC/AVR/inst-brcs.s @@ -12,12 +12,12 @@ foo: bar: -; CHECK: brcs (.Ltmp0+8)+2 ; encoding: [0bAAAAA000,0b111100AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+8)+2, kind: fixup_7_pcrel -; CHECK: brcs (.Ltmp1+4)+2 ; encoding: [0bAAAAA000,0b111100AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1+4)+2, kind: fixup_7_pcrel -; CHECK: brcs (.Ltmp2+8)+2 ; encoding: [0bAAAAA000,0b111100AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp2+8)+2, kind: fixup_7_pcrel +; CHECK: brcs .Ltmp0+8+2 ; encoding: [0bAAAAA000,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp0+8+2, kind: fixup_7_pcrel +; CHECK: brcs .Ltmp1+4+2 ; encoding: [0bAAAAA000,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp1+4+2, kind: fixup_7_pcrel +; CHECK: brcs .Ltmp2+8+2 ; encoding: [0bAAAAA000,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp2+8+2, kind: fixup_7_pcrel ; CHECK: brcs bar ; encoding: [0bAAAAA000,0b111100AA] ; CHECK-NEXT: ; fixup A - offset: 0, value: bar, kind: fixup_7_pcrel diff --git a/llvm/test/MC/AVR/inst-breq.s b/llvm/test/MC/AVR/inst-breq.s index 7a6eac6f01ad0..51bc6192e05ed 100644 --- a/llvm/test/MC/AVR/inst-breq.s +++ b/llvm/test/MC/AVR/inst-breq.s @@ -12,12 +12,12 @@ foo: bar: -; CHECK: breq (.Ltmp0-18)+2 ; encoding: [0bAAAAA001,0b111100AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0-18)+2, kind: fixup_7_pcrel -; CHECK: breq (.Ltmp1-12)+2 ; encoding: [0bAAAAA001,0b111100AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1-12)+2, kind: fixup_7_pcrel -; CHECK: brbs 1, (.Ltmp2-18)+2 ; encoding: [0bAAAAA001,0b111100AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp2-18)+2, kind: fixup_7_pcrel +; CHECK: breq .Ltmp0-18+2 ; encoding: [0bAAAAA001,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp0-18+2, kind: fixup_7_pcrel +; CHECK: breq .Ltmp1-12+2 ; encoding: [0bAAAAA001,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp1-12+2, kind: fixup_7_pcrel +; CHECK: brbs 1, .Ltmp2-18+2 ; encoding: [0bAAAAA001,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp2-18+2, kind: fixup_7_pcrel ; CHECK: brbs 1, bar ; encoding: [0bAAAAA001,0b111100AA] ; CHECK-NEXT: ; fixup A - offset: 0, value: bar, kind: fixup_7_pcrel diff --git a/llvm/test/MC/AVR/inst-brge.s b/llvm/test/MC/AVR/inst-brge.s index 6cf79db4dbd65..904f4a496e777 100644 --- a/llvm/test/MC/AVR/inst-brge.s +++ b/llvm/test/MC/AVR/inst-brge.s @@ -11,10 +11,10 @@ foo: bar: -; CHECK: brge (.Ltmp0+50)+2 ; encoding: [0bAAAAA100,0b111101AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+50)+2, kind: fixup_7_pcrel -; CHECK: brge (.Ltmp1+42)+2 ; encoding: [0bAAAAA100,0b111101AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1+42)+2, kind: fixup_7_pcrel +; CHECK: brge .Ltmp0+50+2 ; encoding: [0bAAAAA100,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp0+50+2, kind: fixup_7_pcrel +; CHECK: brge .Ltmp1+42+2 ; encoding: [0bAAAAA100,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp1+42+2, kind: fixup_7_pcrel ; CHECK: brge bar ; encoding: [0bAAAAA100,0b111101AA] ; CHECK-NEXT: ; fixup A - offset: 0, value: bar, kind: fixup_7_pcrel diff --git a/llvm/test/MC/AVR/inst-brhc.s b/llvm/test/MC/AVR/inst-brhc.s index 924895e4bf5df..77052e664d389 100644 --- a/llvm/test/MC/AVR/inst-brhc.s +++ b/llvm/test/MC/AVR/inst-brhc.s @@ -11,10 +11,10 @@ foo: bar: -; CHECK: brhc (.Ltmp0+12)+2 ; encoding: [0bAAAAA101,0b111101AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+12)+2, kind: fixup_7_pcrel -; CHECK: brhc (.Ltmp1+14)+2 ; encoding: [0bAAAAA101,0b111101AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1+14)+2, kind: fixup_7_pcrel +; CHECK: brhc .Ltmp0+12+2 ; encoding: [0bAAAAA101,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp0+12+2, kind: fixup_7_pcrel +; CHECK: brhc .Ltmp1+14+2 ; encoding: [0bAAAAA101,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp1+14+2, kind: fixup_7_pcrel ; CHECK: brhc bar ; encoding: [0bAAAAA101,0b111101AA] ; CHECK-NEXT: ; fixup A - offset: 0, value: bar, kind: fixup_7_pcrel diff --git a/llvm/test/MC/AVR/inst-brhs.s b/llvm/test/MC/AVR/inst-brhs.s index 9704ce5e7e5ac..b4c55cafd5de9 100644 --- a/llvm/test/MC/AVR/inst-brhs.s +++ b/llvm/test/MC/AVR/inst-brhs.s @@ -11,10 +11,10 @@ foo: bar: -; CHECK: brhs (.Ltmp0-66)+2 ; encoding: [0bAAAAA101,0b111100AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0-66)+2, kind: fixup_7_pcrel -; CHECK: brhs (.Ltmp1+14)+2 ; encoding: [0bAAAAA101,0b111100AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1+14)+2, kind: fixup_7_pcrel +; CHECK: brhs .Ltmp0-66+2 ; encoding: [0bAAAAA101,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp0-66+2, kind: fixup_7_pcrel +; CHECK: brhs .Ltmp1+14+2 ; encoding: [0bAAAAA101,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp1+14+2, kind: fixup_7_pcrel ; CHECK: brhs bar ; encoding: [0bAAAAA101,0b111100AA] ; CHECK-NEXT: ; fixup A - offset: 0, value: bar, kind: fixup_7_pcrel diff --git a/llvm/test/MC/AVR/inst-brid.s b/llvm/test/MC/AVR/inst-brid.s index e03c293677887..4cf1869dc1b2c 100644 --- a/llvm/test/MC/AVR/inst-brid.s +++ b/llvm/test/MC/AVR/inst-brid.s @@ -11,10 +11,10 @@ foo: bar: -; CHECK: brid (.Ltmp0+42)+2 ; encoding: [0bAAAAA111,0b111101AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+42)+2, kind: fixup_7_pcrel -; CHECK: brid (.Ltmp1+62)+2 ; encoding: [0bAAAAA111,0b111101AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1+62)+2, kind: fixup_7_pcrel +; CHECK: brid .Ltmp0+42+2 ; encoding: [0bAAAAA111,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp0+42+2, kind: fixup_7_pcrel +; CHECK: brid .Ltmp1+62+2 ; encoding: [0bAAAAA111,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp1+62+2, kind: fixup_7_pcrel ; CHECK: brid bar ; encoding: [0bAAAAA111,0b111101AA] ; CHECK-NEXT: ; fixup A - offset: 0, value: bar, kind: fixup_7_pcrel diff --git a/llvm/test/MC/AVR/inst-brie.s b/llvm/test/MC/AVR/inst-brie.s index 74b724b20bd9e..7c7e97c2b201e 100644 --- a/llvm/test/MC/AVR/inst-brie.s +++ b/llvm/test/MC/AVR/inst-brie.s @@ -11,10 +11,10 @@ foo: bar: -; CHECK: brie (.Ltmp0+20)+2 ; encoding: [0bAAAAA111,0b111100AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+20)+2, kind: fixup_7_pcrel -; CHECK: brie (.Ltmp1+40)+2 ; encoding: [0bAAAAA111,0b111100AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1+40)+2, kind: fixup_7_pcrel +; CHECK: brie .Ltmp0+20+2 ; encoding: [0bAAAAA111,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp0+20+2, kind: fixup_7_pcrel +; CHECK: brie .Ltmp1+40+2 ; encoding: [0bAAAAA111,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp1+40+2, kind: fixup_7_pcrel ; CHECK: brie bar ; encoding: [0bAAAAA111,0b111100AA] ; CHECK-NEXT: ; fixup A - offset: 0, value: bar, kind: fixup_7_pcrel diff --git a/llvm/test/MC/AVR/inst-brlo.s b/llvm/test/MC/AVR/inst-brlo.s index 2726d943e0e78..9523fd5695a99 100644 --- a/llvm/test/MC/AVR/inst-brlo.s +++ b/llvm/test/MC/AVR/inst-brlo.s @@ -11,10 +11,10 @@ foo: bar: -; CHECK: brlo (.Ltmp0+12)+2 ; encoding: [0bAAAAA000,0b111100AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+12)+2, kind: fixup_7_pcrel -; CHECK: brlo (.Ltmp1+28)+2 ; encoding: [0bAAAAA000,0b111100AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1+28)+2, kind: fixup_7_pcrel +; CHECK: brlo .Ltmp0+12+2 ; encoding: [0bAAAAA000,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp0+12+2, kind: fixup_7_pcrel +; CHECK: brlo .Ltmp1+28+2 ; encoding: [0bAAAAA000,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp1+28+2, kind: fixup_7_pcrel ; CHECK: brlo bar ; encoding: [0bAAAAA000,0b111100AA] ; CHECK-NEXT: ; fixup A - offset: 0, value: bar, kind: fixup_7_pcrel diff --git a/llvm/test/MC/AVR/inst-brlt.s b/llvm/test/MC/AVR/inst-brlt.s index 299a873963e5b..c309310909fa7 100644 --- a/llvm/test/MC/AVR/inst-brlt.s +++ b/llvm/test/MC/AVR/inst-brlt.s @@ -11,10 +11,10 @@ foo: bar: -; CHECK: brlt (.Ltmp0+16)+2 ; encoding: [0bAAAAA100,0b111100AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+16)+2, kind: fixup_7_pcrel -; CHECK: brlt (.Ltmp1+2)+2 ; encoding: [0bAAAAA100,0b111100AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1+2)+2, kind: fixup_7_pcrel +; CHECK: brlt .Ltmp0+16+2 ; encoding: [0bAAAAA100,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp0+16+2, kind: fixup_7_pcrel +; CHECK: brlt .Ltmp1+2+2 ; encoding: [0bAAAAA100,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp1+2+2, kind: fixup_7_pcrel ; CHECK: brlt bar ; encoding: [0bAAAAA100,0b111100AA] ; CHECK-NEXT: ; fixup A - offset: 0, value: bar, kind: fixup_7_pcrel diff --git a/llvm/test/MC/AVR/inst-brmi.s b/llvm/test/MC/AVR/inst-brmi.s index 96f7e484f465f..ec60bc4a14f1c 100644 --- a/llvm/test/MC/AVR/inst-brmi.s +++ b/llvm/test/MC/AVR/inst-brmi.s @@ -11,10 +11,10 @@ foo: bar: -; CHECK: brmi (.Ltmp0+66)+2 ; encoding: [0bAAAAA010,0b111100AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+66)+2, kind: fixup_7_pcrel -; CHECK: brmi (.Ltmp1+58)+2 ; encoding: [0bAAAAA010,0b111100AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1+58)+2, kind: fixup_7_pcrel +; CHECK: brmi .Ltmp0+66+2 ; encoding: [0bAAAAA010,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp0+66+2, kind: fixup_7_pcrel +; CHECK: brmi .Ltmp1+58+2 ; encoding: [0bAAAAA010,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp1+58+2, kind: fixup_7_pcrel ; CHECK: brmi bar ; encoding: [0bAAAAA010,0b111100AA] ; CHECK-NEXT: ; fixup A - offset: 0, value: bar, kind: fixup_7_pcrel diff --git a/llvm/test/MC/AVR/inst-brne.s b/llvm/test/MC/AVR/inst-brne.s index ab89d516681d3..2a424a3593247 100644 --- a/llvm/test/MC/AVR/inst-brne.s +++ b/llvm/test/MC/AVR/inst-brne.s @@ -12,12 +12,12 @@ foo: bar: -; CHECK: brne (.Ltmp0+10)+2 ; encoding: [0bAAAAA001,0b111101AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+10)+2, kind: fixup_7_pcrel -; CHECK: brne (.Ltmp1+2)+2 ; encoding: [0bAAAAA001,0b111101AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1+2)+2, kind: fixup_7_pcrel -; CHECK: brbc 1, (.Ltmp2+10)+2 ; encoding: [0bAAAAA001,0b111101AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp2+10)+2, kind: fixup_7_pcrel +; CHECK: brne .Ltmp0+10+2 ; encoding: [0bAAAAA001,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp0+10+2, kind: fixup_7_pcrel +; CHECK: brne .Ltmp1+2+2 ; encoding: [0bAAAAA001,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp1+2+2, kind: fixup_7_pcrel +; CHECK: brbc 1, .Ltmp2+10+2 ; encoding: [0bAAAAA001,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp2+10+2, kind: fixup_7_pcrel ; CHECK: brbc 1, bar ; encoding: [0bAAAAA001,0b111101AA] ; CHECK-NEXT: ; fixup A - offset: 0, value: bar, kind: fixup_7_pcrel diff --git a/llvm/test/MC/AVR/inst-brpl.s b/llvm/test/MC/AVR/inst-brpl.s index cd2f697ae8f20..d752f34ee606b 100644 --- a/llvm/test/MC/AVR/inst-brpl.s +++ b/llvm/test/MC/AVR/inst-brpl.s @@ -11,10 +11,10 @@ foo: bar: -; CHECK: brpl (.Ltmp0-12)+2 ; encoding: [0bAAAAA010,0b111101AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0-12)+2, kind: fixup_7_pcrel -; CHECK: brpl (.Ltmp1+18)+2 ; encoding: [0bAAAAA010,0b111101AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1+18)+2, kind: fixup_7_pcrel +; CHECK: brpl .Ltmp0-12+2 ; encoding: [0bAAAAA010,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp0-12+2, kind: fixup_7_pcrel +; CHECK: brpl .Ltmp1+18+2 ; encoding: [0bAAAAA010,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp1+18+2, kind: fixup_7_pcrel ; CHECK: brpl bar ; encoding: [0bAAAAA010,0b111101AA] ; CHECK-NEXT: ; fixup A - offset: 0, value: bar, kind: fixup_7_pcrel diff --git a/llvm/test/MC/AVR/inst-brsh.s b/llvm/test/MC/AVR/inst-brsh.s index b066c917f72ae..95a6a52acb60c 100644 --- a/llvm/test/MC/AVR/inst-brsh.s +++ b/llvm/test/MC/AVR/inst-brsh.s @@ -11,10 +11,10 @@ foo: bar: -; CHECK: brsh (.Ltmp0+32)+2 ; encoding: [0bAAAAA000,0b111101AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+32)+2, kind: fixup_7_pcrel -; CHECK: brsh (.Ltmp1+70)+2 ; encoding: [0bAAAAA000,0b111101AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1+70)+2, kind: fixup_7_pcrel +; CHECK: brsh .Ltmp0+32+2 ; encoding: [0bAAAAA000,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp0+32+2, kind: fixup_7_pcrel +; CHECK: brsh .Ltmp1+70+2 ; encoding: [0bAAAAA000,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp1+70+2, kind: fixup_7_pcrel ; CHECK: brsh bar ; encoding: [0bAAAAA000,0b111101AA] ; CHECK-NEXT: ; fixup A - offset: 0, value: bar, kind: fixup_7_pcrel diff --git a/llvm/test/MC/AVR/inst-brtc.s b/llvm/test/MC/AVR/inst-brtc.s index 64421df10baf5..d8704dc6f345d 100644 --- a/llvm/test/MC/AVR/inst-brtc.s +++ b/llvm/test/MC/AVR/inst-brtc.s @@ -11,10 +11,10 @@ foo: bar: -; CHECK: brtc (.Ltmp0+52)+2 ; encoding: [0bAAAAA110,0b111101AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+52)+2, kind: fixup_7_pcrel -; CHECK: brtc (.Ltmp1+50)+2 ; encoding: [0bAAAAA110,0b111101AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1+50)+2, kind: fixup_7_pcrel +; CHECK: brtc .Ltmp0+52+2 ; encoding: [0bAAAAA110,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp0+52+2, kind: fixup_7_pcrel +; CHECK: brtc .Ltmp1+50+2 ; encoding: [0bAAAAA110,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp1+50+2, kind: fixup_7_pcrel ; CHECK: brtc bar ; encoding: [0bAAAAA110,0b111101AA] ; CHECK-NEXT: ; fixup A - offset: 0, value: bar, kind: fixup_7_pcrel diff --git a/llvm/test/MC/AVR/inst-brts.s b/llvm/test/MC/AVR/inst-brts.s index bb02b6f3d475d..976f23ff8c208 100644 --- a/llvm/test/MC/AVR/inst-brts.s +++ b/llvm/test/MC/AVR/inst-brts.s @@ -11,10 +11,10 @@ foo: bar: -; CHECK: brts (.Ltmp0+18)+2 ; encoding: [0bAAAAA110,0b111100AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+18)+2, kind: fixup_7_pcrel -; CHECK: brts (.Ltmp1+22)+2 ; encoding: [0bAAAAA110,0b111100AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1+22)+2, kind: fixup_7_pcrel +; CHECK: brts .Ltmp0+18+2 ; encoding: [0bAAAAA110,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp0+18+2, kind: fixup_7_pcrel +; CHECK: brts .Ltmp1+22+2 ; encoding: [0bAAAAA110,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp1+22+2, kind: fixup_7_pcrel ; CHECK: brts bar ; encoding: [0bAAAAA110,0b111100AA] ; CHECK-NEXT: ; fixup A - offset: 0, value: bar, kind: fixup_7_pcrel diff --git a/llvm/test/MC/AVR/inst-brvc.s b/llvm/test/MC/AVR/inst-brvc.s index 52b9f3b9b403c..766146cc57aaf 100644 --- a/llvm/test/MC/AVR/inst-brvc.s +++ b/llvm/test/MC/AVR/inst-brvc.s @@ -11,10 +11,10 @@ foo: bar: -; CHECK: brvc (.Ltmp0-28)+2 ; encoding: [0bAAAAA011,0b111101AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0-28)+2, kind: fixup_7_pcrel -; CHECK: brvc (.Ltmp1-62)+2 ; encoding: [0bAAAAA011,0b111101AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1-62)+2, kind: fixup_7_pcrel +; CHECK: brvc .Ltmp0-28+2 ; encoding: [0bAAAAA011,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp0-28+2, kind: fixup_7_pcrel +; CHECK: brvc .Ltmp1-62+2 ; encoding: [0bAAAAA011,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp1-62+2, kind: fixup_7_pcrel ; CHECK: brvc bar ; encoding: [0bAAAAA011,0b111101AA] ; CHECK-NEXT: ; fixup A - offset: 0, value: bar, kind: fixup_7_pcrel diff --git a/llvm/test/MC/AVR/inst-brvs.s b/llvm/test/MC/AVR/inst-brvs.s index 10382a8e6fd67..9ebe9c3181ee9 100644 --- a/llvm/test/MC/AVR/inst-brvs.s +++ b/llvm/test/MC/AVR/inst-brvs.s @@ -11,10 +11,10 @@ foo: bar: -; CHECK: brvs (.Ltmp0+18)+2 ; encoding: [0bAAAAA011,0b111100AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+18)+2, kind: fixup_7_pcrel -; CHECK: brvs (.Ltmp1+32)+2 ; encoding: [0bAAAAA011,0b111100AA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1+32)+2, kind: fixup_7_pcrel +; CHECK: brvs .Ltmp0+18+2 ; encoding: [0bAAAAA011,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp0+18+2, kind: fixup_7_pcrel +; CHECK: brvs .Ltmp1+32+2 ; encoding: [0bAAAAA011,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp1+32+2, kind: fixup_7_pcrel ; CHECK: brvs bar ; encoding: [0bAAAAA011,0b111100AA] ; CHECK-NEXT: ; fixup A - offset: 0, value: bar, kind: fixup_7_pcrel diff --git a/llvm/test/MC/AVR/inst-rcall.s b/llvm/test/MC/AVR/inst-rcall.s index 34c2ef86366c5..d0a9e6b7b0463 100644 --- a/llvm/test/MC/AVR/inst-rcall.s +++ b/llvm/test/MC/AVR/inst-rcall.s @@ -11,14 +11,14 @@ foo: rcall .+46 .short 0xdfea -; CHECK: rcall (.Ltmp0+0)+2 ; encoding: [A,0b1101AAAA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+0)+2, kind: fixup_13_pcrel -; CHECK: rcall (.Ltmp1-8)+2 ; encoding: [A,0b1101AAAA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1-8)+2, kind: fixup_13_pcrel -; CHECK: rcall (.Ltmp2+12)+2 ; encoding: [A,0b1101AAAA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp2+12)+2, kind: fixup_13_pcrel -; CHECK: rcall (.Ltmp3+46)+2 ; encoding: [A,0b1101AAAA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp3+46)+2, kind: fixup_13_pcrel +; CHECK: rcall .Ltmp0+0+2 ; encoding: [A,0b1101AAAA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp0+0+2, kind: fixup_13_pcrel +; CHECK: rcall .Ltmp1-8+2 ; encoding: [A,0b1101AAAA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp1-8+2, kind: fixup_13_pcrel +; CHECK: rcall .Ltmp2+12+2 ; encoding: [A,0b1101AAAA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp2+12+2, kind: fixup_13_pcrel +; CHECK: rcall .Ltmp3+46+2 ; encoding: [A,0b1101AAAA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp3+46+2, kind: fixup_13_pcrel ; INST-LABEL: : ; INST-NEXT: 00 d0 rcall .+0 diff --git a/llvm/test/MC/AVR/inst-rjmp.s b/llvm/test/MC/AVR/inst-rjmp.s index cf2a9d106f3d1..8971ff7ddcd8a 100644 --- a/llvm/test/MC/AVR/inst-rjmp.s +++ b/llvm/test/MC/AVR/inst-rjmp.s @@ -21,26 +21,26 @@ x: .short 0xc00f rjmp .+4094 -; CHECK: rjmp (.Ltmp0+2)+2 ; encoding: [A,0b1100AAAA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+2)+2, kind: fixup_13_pcrel -; CHECK: rjmp (.Ltmp1-2)+2 ; encoding: [A,0b1100AAAA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1-2)+2, kind: fixup_13_pcrel +; CHECK: rjmp .Ltmp0+2+2 ; encoding: [A,0b1100AAAA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp0+2+2, kind: fixup_13_pcrel +; CHECK: rjmp .Ltmp1-2+2 ; encoding: [A,0b1100AAAA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp1-2+2, kind: fixup_13_pcrel ; CHECK: rjmp foo ; encoding: [A,0b1100AAAA] ; CHECK-NEXT: ; fixup A - offset: 0, value: foo, kind: fixup_13_pcrel -; CHECK: rjmp (.Ltmp2+8)+2 ; encoding: [A,0b1100AAAA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp2+8)+2, kind: fixup_13_pcrel +; CHECK: rjmp .Ltmp2+8+2 ; encoding: [A,0b1100AAAA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp2+8+2, kind: fixup_13_pcrel ; CHECK: rjmp end ; encoding: [A,0b1100AAAA] ; CHECK-NEXT: ; fixup A - offset: 0, value: end, kind: fixup_13_pcrel -; CHECK: rjmp (.Ltmp3+0)+2 ; encoding: [A,0b1100AAAA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp3+0)+2, kind: fixup_13_pcrel -; CHECK: rjmp (.Ltmp4-4)+2 ; encoding: [A,0b1100AAAA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp4-4)+2, kind: fixup_13_pcrel -; CHECK: rjmp (.Ltmp5-6)+2 ; encoding: [A,0b1100AAAA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp5-6)+2, kind: fixup_13_pcrel +; CHECK: rjmp .Ltmp3+0+2 ; encoding: [A,0b1100AAAA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp3+0+2, kind: fixup_13_pcrel +; CHECK: rjmp .Ltmp4-4+2 ; encoding: [A,0b1100AAAA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp4-4+2, kind: fixup_13_pcrel +; CHECK: rjmp .Ltmp5-6+2 ; encoding: [A,0b1100AAAA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp5-6+2, kind: fixup_13_pcrel ; CHECK: rjmp x ; encoding: [A,0b1100AAAA] ; CHECK-NEXT: ; fixup A - offset: 0, value: x, kind: fixup_13_pcrel -; CHECK: rjmp (.Ltmp6+4094)+2 ; encoding: [A,0b1100AAAA] -; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp6+4094)+2, kind: fixup_13_pcrel +; CHECK: rjmp .Ltmp6+4094+2 ; encoding: [A,0b1100AAAA] +; CHECK-NEXT: ; fixup A - offset: 0, value: .Ltmp6+4094+2, kind: fixup_13_pcrel ; INST-LABEL: : ; INST-NEXT: 01 c0 rjmp .+2 diff --git a/llvm/test/MC/AsmParser/directive_fill.s b/llvm/test/MC/AsmParser/directive_fill.s index a34112542b053..fd0a6056c9456 100644 --- a/llvm/test/MC/AsmParser/directive_fill.s +++ b/llvm/test/MC/AsmParser/directive_fill.s @@ -72,7 +72,7 @@ TEST12: .fill TEST11 - TEST12, 4, 0x12345678 # CHECK: TEST13 -# CHECK: .fill (TEST11-TEST12)+i, 4, 0x12345678 +# CHECK: .fill TEST11-TEST12+i, 4, 0x12345678 # OBJ-ERRS: [[@LINE+2]]:8: error: expected assembly-time absolute expression TEST13: .fill TEST11 - TEST12+i, 4, 0x12345678 diff --git a/llvm/test/MC/AsmParser/dollars-in-identifiers.s b/llvm/test/MC/AsmParser/dollars-in-identifiers.s index e56959062ad9d..2fd35535d356f 100644 --- a/llvm/test/MC/AsmParser/dollars-in-identifiers.s +++ b/llvm/test/MC/AsmParser/dollars-in-identifiers.s @@ -3,5 +3,5 @@ // CHECK: .globl $foo .globl $foo -// CHECK: .long ($foo) +// CHECK: .long $foo .long ($foo) diff --git a/llvm/test/MC/AsmParser/expr_symbol_modifiers.s b/llvm/test/MC/AsmParser/expr_symbol_modifiers.s index 7371c97cbdf4c..e465cf83d5ebe 100644 --- a/llvm/test/MC/AsmParser/expr_symbol_modifiers.s +++ b/llvm/test/MC/AsmParser/expr_symbol_modifiers.s @@ -10,5 +10,5 @@ .long a + 4@GOTPCREL // CHECK: .long a@GOTPCREL+b@GOTPCREL .long (a + b)@GOTPCREL -// CHECK: .long (10+b@GOTPCREL)+4 +// CHECK: .long 10+b@GOTPCREL+4 .long 10 + b + 4@GOTPCREL diff --git a/llvm/test/MC/COFF/cross-section-relative.ll b/llvm/test/MC/COFF/cross-section-relative.ll index 1b11a4659fee0..0f27aacd4915c 100644 --- a/llvm/test/MC/COFF/cross-section-relative.ll +++ b/llvm/test/MC/COFF/cross-section-relative.ll @@ -11,11 +11,11 @@ ;;;; cross-section relative relocations -; CHECK: .quad (g3-t1)+4 +; CHECK: .quad g3-t1+4 @t1 = global i64 add(i64 sub(i64 ptrtoint(ptr @g3 to i64), i64 ptrtoint(ptr @t1 to i64)), i64 4), section ".fix" ; CHECK: .quad g3-t2 @t2 = global i64 sub(i64 ptrtoint(ptr @g3 to i64), i64 ptrtoint(ptr @t2 to i64)), section ".fix" -; CHECK: .quad (g3-t3)-4 +; CHECK: .quad g3-t3-4 @t3 = global i64 sub(i64 sub(i64 ptrtoint(ptr @g3 to i64), i64 ptrtoint(ptr @t3 to i64)), i64 4), section ".fix" ; CHECK: .long g3-t4 @t4 = global i32 trunc(i64 sub(i64 ptrtoint(ptr @g3 to i64), i64 ptrtoint(ptr @t4 to i64)) to i32), section ".fix" @@ -32,7 +32,7 @@ %struct.EEType = type { [2 x i8], i64, i32} -; CHECK: .long (g3-t7)-16 +; CHECK: .long g3-t7-16 @t7 = global %struct.EEType { [2 x i8] c"\01\02", i64 256, diff --git a/llvm/test/MC/ELF/gnu-type-diagnostics.s b/llvm/test/MC/ELF/gnu-type-diagnostics.s index 23c144fee1db8..dbbfc5a173dec 100644 --- a/llvm/test/MC/ELF/gnu-type-diagnostics.s +++ b/llvm/test/MC/ELF/gnu-type-diagnostics.s @@ -15,4 +15,5 @@ // CHECK: .type symbol 32 // CHECK: ^ - +.section "foo", "a", !progbits +// CHECK: [[#@LINE-1]]:22: error: expected '@', '%' or "" diff --git a/llvm/test/MC/ELF/reloc-directive.s b/llvm/test/MC/ELF/reloc-directive.s index a4658f938d0d3..f4121ef071810 100644 --- a/llvm/test/MC/ELF/reloc-directive.s +++ b/llvm/test/MC/ELF/reloc-directive.s @@ -4,12 +4,12 @@ # RUN: llvm-readobj -r %t | FileCheck %s # ASM: .Ltmp0: -# ASM-NEXT: .reloc (.Ltmp0+3)-2, R_X86_64_NONE, foo +# ASM-NEXT: .reloc .Ltmp0+3-2, R_X86_64_NONE, foo # ASM-NEXT: .Ltmp1: # ASM-NEXT: .reloc .Ltmp1-1, R_X86_64_NONE, foo # ASM-NEXT: .Ltmp2: # ASM-NEXT: .reloc 2+.Ltmp2, R_X86_64_NONE, foo -# ASM-NEXT: .reloc (1+foo)+3, R_X86_64_NONE, data+1 +# ASM-NEXT: .reloc 1+foo+3, R_X86_64_NONE, data+1 # ASM-NEXT: .Ltmp3: # ASM-NEXT: .reloc .Ltmp3, BFD_RELOC_NONE, unused diff --git a/llvm/test/MC/Lanai/memory.s b/llvm/test/MC/Lanai/memory.s index 398cb8e123711..41dc8fba7bf29 100644 --- a/llvm/test/MC/Lanai/memory.s +++ b/llvm/test/MC/Lanai/memory.s @@ -239,9 +239,9 @@ mov hi(l+4), %r7 ! CHECK: encoding: [0x03,0x81,A,A] -! CHECK-NEXT: fixup A - offset: 0, value: (hi(l))+4, kind: FIXUP_LANAI_HI16{{$}} +! CHECK-NEXT: fixup A - offset: 0, value: hi(l)+4, kind: FIXUP_LANAI_HI16{{$}} ! CHECK-NEXT: ! CHECK-NEXT: -! CHECK-NEXT: +! CHECK-NEXT: diff --git a/llvm/test/MC/MachO/AArch64/cstexpr-gotpcrel.ll b/llvm/test/MC/MachO/AArch64/cstexpr-gotpcrel.ll index 3681ed5351839..53f43e68ac794 100644 --- a/llvm/test/MC/MachO/AArch64/cstexpr-gotpcrel.ll +++ b/llvm/test/MC/MachO/AArch64/cstexpr-gotpcrel.ll @@ -49,7 +49,7 @@ ; supported on x86-64 but not on ARM64 ; CHECK: .long 5 -; CHECK-NEXT: .long ((l_extgotequiv-_table)-44)+24 +; CHECK-NEXT: .long l_extgotequiv-_table-44+24 %struct.data { i32 4, %struct.anon { i32 5, i32 add (i32 trunc (i64 sub (i64 ptrtoint (ptr @extgotequiv to i64), i64 ptrtoint (ptr getelementptr inbounds ([4 x %struct.data], ptr @table, i32 0, i64 3, i32 1, i32 1) to i64)) @@ -67,7 +67,7 @@ to i32) ; CHECK-LABEL: _deltaplus: -; CHECK: .long (l_localgotequiv-_deltaplus)+55 +; CHECK: .long l_localgotequiv-_deltaplus+55 @deltaplus = global i32 add (i32 trunc (i64 sub (i64 ptrtoint (ptr @localgotequiv to i64), i64 ptrtoint (ptr @deltaplus to i64)) to i32), i32 55) diff --git a/llvm/test/MC/MachO/dollar-identifier.s b/llvm/test/MC/MachO/dollar-identifier.s index ca6993f7f4040..7eff63354b660 100644 --- a/llvm/test/MC/MachO/dollar-identifier.s +++ b/llvm/test/MC/MachO/dollar-identifier.s @@ -1,4 +1,4 @@ // RUN: llvm-mc -triple x86_64-apple-darwin10 %s | FileCheck %s .long $1 -// CHECK: .long ($1) +// CHECK: .long $1 diff --git a/llvm/test/MC/Mips/expansion-jal-sym-pic.s b/llvm/test/MC/Mips/expansion-jal-sym-pic.s index c7b5ccc1880bd..6f1b7c9d81b42 100644 --- a/llvm/test/MC/Mips/expansion-jal-sym-pic.s +++ b/llvm/test/MC/Mips/expansion-jal-sym-pic.s @@ -55,7 +55,7 @@ local_label: # O32: # fixup A - offset: 0, value: %got(local_label), kind: fixup_Mips_GOT # O32: addiu $25, $25, %lo(local_label) # encoding: [0x27,0x39,A,A] # O32: # fixup A - offset: 0, value: %lo(local_label), kind: fixup_Mips_LO16 -# O32-NEXT: .reloc ($tmp0), R_MIPS_JALR, local_label +# O32-NEXT: .reloc $tmp0, R_MIPS_JALR, local_label # ELF-O32: 8f 99 00 00 lw $25, 0($gp) # ELF-O32-NEXT: R_MIPS_GOT16 .text @@ -68,7 +68,7 @@ local_label: # XO32-NEXT: # fixup A - offset: 0, value: %got(local_label), kind: fixup_Mips_GOT # XO32-NEXT: addiu $25, $25, %lo(local_label) # encoding: [0x27,0x39,A,A] # XO32-NEXT: # fixup A - offset: 0, value: %lo(local_label), kind: fixup_Mips_LO16 -# XO32-NEXT: .reloc ($tmp0), R_MIPS_JALR, local_label +# XO32-NEXT: .reloc $tmp0, R_MIPS_JALR, local_label # ELF-XO32: 8f 99 00 00 lw $25, 0($gp) # ELF-XO32-NEXT: R_MIPS_GOT16 .text @@ -117,7 +117,7 @@ local_label: # O32-MM: # fixup A - offset: 0, value: %got(local_label), kind: fixup_MICROMIPS_GOT16 # O32-MM: addiu $25, $25, %lo(local_label) # encoding: [0x33,0x39,A,A] # O32-MM: # fixup A - offset: 0, value: %lo(local_label), kind: fixup_MICROMIPS_LO16 -# O32-MM-NEXT: .reloc ($tmp0), R_MICROMIPS_JALR, local_label +# O32-MM-NEXT: .reloc $tmp0, R_MICROMIPS_JALR, local_label # MIPS: jalr $25 # encoding: [0x03,0x20,0xf8,0x09] # MM: jalr $ra, $25 # encoding: [0x03,0xf9,0x0f,0x3c] @@ -212,7 +212,7 @@ local_label: # Expanding "jal weak_label": # O32: lw $25, %call16(weak_label)($gp) # encoding: [0x8f,0x99,A,A] # O32: # fixup A - offset: 0, value: %call16(weak_label), kind: fixup_Mips_CALL16 -# O32-NEXT: .reloc ($tmp1), R_MIPS_JALR, weak_label +# O32-NEXT: .reloc $tmp1, R_MIPS_JALR, weak_label # ELF-O32: 8f 99 00 00 lw $25, 0($gp) # ELF-O32-NEXT: R_MIPS_CALL16 weak_label @@ -224,7 +224,7 @@ local_label: # XO32-NEXT: addu $25, $25, $gp # encoding: [0x03,0x3c,0xc8,0x21] # XO32-NEXT: lw $25, %call_lo(weak_label)($25) # encoding: [0x8f,0x39,A,A] # XO32-NEXT: # fixup A - offset: 0, value: %call_lo(weak_label), kind: fixup_Mips_CALL_LO16 -# XO32-NEXT: .reloc ($tmp1), R_MIPS_JALR, weak_label +# XO32-NEXT: .reloc $tmp1, R_MIPS_JALR, weak_label # ELF-XO32: 3c 19 00 00 lui $25, 0 # ELF-XO32-MEXT: R_MIPS_CALL_HI16 weak_label @@ -284,7 +284,7 @@ local_label: # O32-MM: lw $25, %call16(weak_label)($gp) # encoding: [0xff,0x3c,A,A] # O32-MM: # fixup A - offset: 0, value: %call16(weak_label), kind: fixup_MICROMIPS_CALL16 -# O32-MM-NEXT: .reloc ($tmp1), R_MICROMIPS_JALR, weak_label +# O32-MM-NEXT: .reloc $tmp1, R_MICROMIPS_JALR, weak_label # MIPS: jalr $25 # encoding: [0x03,0x20,0xf8,0x09] # MM: jalr $ra, $25 # encoding: [0x03,0xf9,0x0f,0x3c] @@ -392,7 +392,7 @@ local_label: # Expanding "jal global_label": # O32: lw $25, %call16(global_label)($gp) # encoding: [0x8f,0x99,A,A] # O32-NEXT: # fixup A - offset: 0, value: %call16(global_label), kind: fixup_Mips_CALL16 -# O32-NEXT: .reloc ($tmp2), R_MIPS_JALR, global_label +# O32-NEXT: .reloc $tmp2, R_MIPS_JALR, global_label # ELF-O32: 8f 99 00 00 lw $25, 0($gp) # ELF-O32-NEXT: R_MIPS_CALL16 global_label @@ -404,7 +404,7 @@ local_label: # XO32-NEXT: addu $25, $25, $gp # encoding: [0x03,0x3c,0xc8,0x21] # XO32-NEXT: lw $25, %call_lo(global_label)($25) # encoding: [0x8f,0x39,A,A] # XO32-NEXT: # fixup A - offset: 0, value: %call_lo(global_label), kind: fixup_Mips_CALL_LO16 -# XO32-NEXT: .reloc ($tmp2), R_MIPS_JALR, global_label +# XO32-NEXT: .reloc $tmp2, R_MIPS_JALR, global_label # ELF-XO32: 3c 19 00 00 lui $25, 0 # ELF-XO32-NEXT: R_MIPS_CALL_HI16 global_label @@ -464,7 +464,7 @@ local_label: # O32-MM: lw $25, %call16(global_label)($gp) # encoding: [0xff,0x3c,A,A] # O32-MM-NEXT: # fixup A - offset: 0, value: %call16(global_label), kind: fixup_MICROMIPS_CALL16 -# O32-MM-NEXT: .reloc ($tmp2), R_MICROMIPS_JALR, global_label +# O32-MM-NEXT: .reloc $tmp2, R_MICROMIPS_JALR, global_label # MIPS: jalr $25 # encoding: [0x03,0x20,0xf8,0x09] # MM: jalr $ra, $25 # encoding: [0x03,0xf9,0x0f,0x3c] @@ -580,7 +580,7 @@ local_label: # XO32-NEXT: # fixup A - offset: 0, value: %got(.text), kind: fixup_Mips_GOT # XO32-NEXT: addiu $25, $25, %lo(.text) # encoding: [0x27,0x39,A,A] # XO32-NEXT: # fixup A - offset: 0, value: %lo(.text), kind: fixup_Mips_LO16 -# XO32-NEXT: .reloc ($tmp3), R_MIPS_JALR, .text +# XO32-NEXT: .reloc $tmp3, R_MIPS_JALR, .text # ELF-XO32: 8f 99 00 00 lw $25, 0($gp) # ELF-XO32-NEXT: R_MIPS_GOT16 .text @@ -623,7 +623,7 @@ local_label: # O32-MM-NEXT: # fixup A - offset: 0, value: %got(.text), kind: fixup_MICROMIPS_GOT16 # O32-MM-NEXT: addiu $25, $25, %lo(.text) # encoding: [0x33,0x39,A,A] # O32-MM-NEXT: # fixup A - offset: 0, value: %lo(.text), kind: fixup_MICROMIPS_LO16 -# O42-MM-NEXT: .reloc ($tmp3), R_MICROMIPS_JALR, .text +# O42-MM-NEXT: .reloc $tmp3, R_MICROMIPS_JALR, .text # MIPS: jalr $25 # encoding: [0x03,0x20,0xf8,0x09] # MM: jalr $ra, $25 # encoding: [0x03,0xf9,0x0f,0x3c] @@ -689,7 +689,7 @@ local_label: # O32-MM-NEXT: # fixup A - offset: 0, value: %got(.text+8), kind: fixup_MICROMIPS_GOT16 # O32-MM-NEXT: addiu $25, $25, %lo(.text+8) # encoding: [0x33,0x39,A,A] # O32-MM-NEXT: # fixup A - offset: 0, value: %lo(.text+8), kind: fixup_MICROMIPS_LO16 -# O42-MM-NEXT: .reloc ($tmp4), R_MICROMIPS_JALR, .text +# O42-MM-NEXT: .reloc $tmp4, R_MICROMIPS_JALR, .text # MIPS: jalr $25 # encoding: [0x03,0x20,0xf8,0x09] # MM: jalr $ra, $25 # encoding: [0x03,0xf9,0x0f,0x3c] @@ -704,7 +704,7 @@ local_label: # O32-NEXT: # fixup A - offset: 0, value: %got($tmp4), kind: fixup_Mips_GOT # O32-NEXT: addiu $25, $25, %lo($tmp4) # encoding: [0x27,0x39,A,A] # O32-NEXT: # fixup A - offset: 0, value: %lo($tmp4), kind: fixup_Mips_LO16 -# O32-NEXT: .reloc ($tmp5), R_MIPS_JALR, ($tmp4) +# O32-NEXT: .reloc $tmp5, R_MIPS_JALR, $tmp4 # ELF-O32: 8f 99 00 00 lw $25, 0($gp) # ELF-O32-NEXT: R_MIPS_GOT16 .text @@ -717,7 +717,7 @@ local_label: # XO32-NEXT: # fixup A - offset: 0, value: %got($tmp4), kind: fixup_Mips_GOT # XO32-NEXT: addiu $25, $25, %lo($tmp4) # encoding: [0x27,0x39,A,A] # XO32-NEXT: # fixup A - offset: 0, value: %lo($tmp4), kind: fixup_Mips_LO16 -# XO32-NEXT: .reloc ($tmp5), R_MIPS_JALR, ($tmp4) +# XO32-NEXT: .reloc $tmp5, R_MIPS_JALR, $tmp4 # ELF-XO32: 8f 99 00 00 lw $25, 0($gp) # ELF-XO32-NEXT: R_MIPS_GOT16 .text @@ -760,7 +760,7 @@ local_label: # O32-MM-NEXT: # fixup A - offset: 0, value: %got($tmp4), kind: fixup_MICROMIPS_GOT16 # O32-MM-NEXT: addiu $25, $25, %lo($tmp4) # encoding: [0x33,0x39,A,A] # O32-MM-NEXT: # fixup A - offset: 0, value: %lo($tmp4), kind: fixup_MICROMIPS_LO16 -# O32-MM-NEXT: .reloc ($tmp5), R_MICROMIPS_JALR, ($tmp4) +# O32-MM-NEXT: .reloc $tmp5, R_MICROMIPS_JALR, $tmp4 # MIPS: jalr $25 # encoding: [0x03,0x20,0xf8,0x09] # MM: jalr $ra, $25 # encoding: [0x03,0xf9,0x0f,0x3c] @@ -769,10 +769,10 @@ local_label: jal 1f+8 nop -# O32: lw $25, %got(($tmp4)+8)($gp) # encoding: [0x8f,0x99,A,A] -# O32-NEXT: # fixup A - offset: 0, value: %got(($tmp4)+8), kind: fixup_Mips_GOT -# O32-NEXT: addiu $25, $25, %lo(($tmp4)+8) # encoding: [0x27,0x39,A,A] -# O32-NEXT: # fixup A - offset: 0, value: %lo(($tmp4)+8), kind: fixup_Mips_LO16 +# O32: lw $25, %got($tmp4+8)($gp) # encoding: [0x8f,0x99,A,A] +# O32-NEXT: # fixup A - offset: 0, value: %got($tmp4+8), kind: fixup_Mips_GOT +# O32-NEXT: addiu $25, $25, %lo($tmp4+8) # encoding: [0x27,0x39,A,A] +# O32-NEXT: # fixup A - offset: 0, value: %lo($tmp4+8), kind: fixup_Mips_LO16 # O32-NOT: .reloc # ELF-O32: 8f 99 00 00 lw $25, 0($gp) @@ -782,10 +782,10 @@ local_label: # ELF-O32-NEXT: 03 20 f8 09 jalr $25 # ELF-O32-NEXT: 00 00 00 00 nop -# XO32: lw $25, %got(($tmp4)+8)($gp) # encoding: [0x8f,0x99,A,A] -# XO32-NEXT: # fixup A - offset: 0, value: %got(($tmp4)+8), kind: fixup_Mips_GOT -# XO32-NEXT: addiu $25, $25, %lo(($tmp4)+8) # encoding: [0x27,0x39,A,A] -# XO32-NEXT: # fixup A - offset: 0, value: %lo(($tmp4)+8), kind: fixup_Mips_LO16 +# XO32: lw $25, %got($tmp4+8)($gp) # encoding: [0x8f,0x99,A,A] +# XO32-NEXT: # fixup A - offset: 0, value: %got($tmp4+8), kind: fixup_Mips_GOT +# XO32-NEXT: addiu $25, $25, %lo($tmp4+8) # encoding: [0x27,0x39,A,A] +# XO32-NEXT: # fixup A - offset: 0, value: %lo($tmp4+8), kind: fixup_Mips_LO16 # XO32-NOT: .reloc # ELF-XO32: 8f 99 00 00 lw $25, 0($gp) @@ -829,10 +829,10 @@ local_label: # ELF-XN64-NEXT: 03 20 f8 09 jalr $25 # ELF-XN64-NEXT: R_MIPS_JALR/R_MIPS_NONE/R_MIPS_NONE .Ltmp0 -# O32-MM: lw $25, %got(($tmp4)+8)($gp) # encoding: [0xff,0x3c,A,A] -# O32-MM-NEXT: # fixup A - offset: 0, value: %got(($tmp4)+8), kind: fixup_MICROMIPS_GOT16 -# O32-MM-NEXT: addiu $25, $25, %lo(($tmp4)+8) # encoding: [0x33,0x39,A,A] -# O32-MM-NEXT: # fixup A - offset: 0, value: %lo(($tmp4)+8), kind: fixup_MICROMIPS_LO16 +# O32-MM: lw $25, %got($tmp4+8)($gp) # encoding: [0xff,0x3c,A,A] +# O32-MM-NEXT: # fixup A - offset: 0, value: %got($tmp4+8), kind: fixup_MICROMIPS_GOT16 +# O32-MM-NEXT: addiu $25, $25, %lo($tmp4+8) # encoding: [0x33,0x39,A,A] +# O32-MM-NEXT: # fixup A - offset: 0, value: %lo($tmp4+8), kind: fixup_MICROMIPS_LO16 # O32-MM-NOT: .reloc # MIPS: jalr $25 # encoding: [0x03,0x20,0xf8,0x09] @@ -848,7 +848,7 @@ local_label: # O32-FIXME: # fixup A - offset: 0, value: %got(forward_local), kind: fixup_Mips_GOT # O32-FIXME: addiu $25, $25, %lo(forward_local) # encoding: [0x27,0x39,A,A] # O32-FIXME:: # fixup A - offset: 0, value: %lo(forward_local), kind: fixup_Mips_LO16 -# O32-FIXME: .reloc ($tmp6), R_MIPS_JALR, forward_local +# O32-FIXME: .reloc $tmp6, R_MIPS_JALR, forward_local # ELF-O32: 8f 99 00 00 lw $25, 0($gp) # ELF-O32-NEXT: R_MIPS_GOT16 .text @@ -873,7 +873,7 @@ local_label: # O32-MM-FIXME: # fixup A - offset: 0, value: %got(forward_local), kind: fixup_MICROMIPS_GOT16 # O32-MM-FIXME: addiu $25, $25, %lo(forward_local) # encoding: [0x33,0x39,A,A] # O32-MM-FIXME: # fixup A - offset: 0, value: %lo(forward_local), kind: fixup_MICROMIPS_LO16 -# O32-MM-FIXME: .reloc ($tmp6), R_MIPS_JALR, forward_local +# O32-MM-FIXME: .reloc $tmp6, R_MIPS_JALR, forward_local # MIPS: jalr $25 # encoding: [0x03,0x20,0xf8,0x09] # MM: jalr $ra, $25 # encoding: [0x03,0xf9,0x0f,0x3c] @@ -887,7 +887,7 @@ local_label: # O32-FIXME: # fixup A - offset: 0, value: %got(forward_local+8), kind: fixup_Mips_GOT # O32-FIXME: addiu $25, $25, %lo(forward_local+8) # encoding: [0x27,0x39,A,A] # O32-FIXME:: # fixup A - offset: 0, value: %lo(forward_local+8), kind: fixup_Mips_LO16 -# O32-FIXME: .reloc ($tmp7), R_MIPS_JALR, forward_local +# O32-FIXME: .reloc $tmp7, R_MIPS_JALR, forward_local # ELF-O32: 8f 99 00 00 lw $25, 0($gp) # ELF-O32-NEXT: R_MIPS_GOT16 .text @@ -912,7 +912,7 @@ local_label: # O32-MM-FIXME: # fixup A - offset: 0, value: %got(forward_local), kind: fixup_MICROMIPS_GOT16 # O32-MM-FIXME: addiu $25, $25, %lo(forward_local) # encoding: [0x33,0x39,A,A] # O32-MM-FIXME: # fixup A - offset: 0, value: %lo(forward_local), kind: fixup_MICROMIPS_LO16 -# O32-MM-FIXME: .reloc ($tmp6), R_MIPS_JALR, forward_local +# O32-MM-FIXME: .reloc $tmp6, R_MIPS_JALR, forward_local # MIPS: jalr $25 # encoding: [0x03,0x20,0xf8,0x09] # MM: jalr $ra, $25 # encoding: [0x03,0xf9,0x0f,0x3c] diff --git a/llvm/test/MC/Mips/expr1.s b/llvm/test/MC/Mips/expr1.s index 7293fc11b23bd..f707091bed7bd 100644 --- a/llvm/test/MC/Mips/expr1.s +++ b/llvm/test/MC/Mips/expr1.s @@ -9,11 +9,11 @@ # 32R2-EL: lw $4, %lo(foo)($4) # encoding: [A,A,0x84,0x8c] # 32R2-EL: # fixup A - offset: 0, value: %lo(foo), kind: fixup_Mips_LO16 # 32R2-EL: lw $4, 56($4) # encoding: [0x38,0x00,0x84,0x8c] -# 32R2-EL: lui $1, %hi(foo+(%lo(8))) # encoding: [A,A,0x01,0x3c] -# 32R2-EL: # fixup A - offset: 0, value: %hi(foo+(%lo(8))), kind: fixup_Mips_HI16 +# 32R2-EL: lui $1, %hi(foo+%lo(8)) # encoding: [A,A,0x01,0x3c] +# 32R2-EL: # fixup A - offset: 0, value: %hi(foo+%lo(8)), kind: fixup_Mips_HI16 # 32R2-EL: addu $1, $1, $4 # encoding: [0x21,0x08,0x24,0x00] -# 32R2-EL: lw $4, %lo(foo+(%lo(8)))($1) # encoding: [A,A,0x24,0x8c] -# 32R2-EL: # fixup A - offset: 0, value: %lo(foo+(%lo(8))), kind: fixup_Mips_LO16 +# 32R2-EL: lw $4, %lo(foo+%lo(8))($1) # encoding: [A,A,0x24,0x8c] +# 32R2-EL: # fixup A - offset: 0, value: %lo(foo+%lo(8)), kind: fixup_Mips_LO16 # 32R2-EL: lw $4, %lo(12+foo)($4) # encoding: [A,A,0x84,0x8c] # 32R2-EL: # fixup A - offset: 0, value: %lo(12+foo), kind: fixup_Mips_LO16 # 32R2-EL: lw $4, 10($4) # encoding: [0x0a,0x00,0x84,0x8c] @@ -27,11 +27,11 @@ # MM-32R2-EL: lw $4, %lo(foo)($4) # encoding: [0x84'A',0xfc'A',0x00,0x00] # MM-32R2-EL: # fixup A - offset: 0, value: %lo(foo), kind: fixup_MICROMIPS_LO16 # MM-32R2-EL: lw $4, 56($4) # encoding: [0x84,0xfc,0x38,0x00] -# MM-32R2-EL: lui $1, %hi(foo+(%lo(8))) # encoding: [0xa1'A',0x41'A',0x00,0x00] -# MM-32R2-EL: # fixup A - offset: 0, value: %hi(foo+(%lo(8))), kind: fixup_MICROMIPS_HI16 +# MM-32R2-EL: lui $1, %hi(foo+%lo(8)) # encoding: [0xa1'A',0x41'A',0x00,0x00] +# MM-32R2-EL: # fixup A - offset: 0, value: %hi(foo+%lo(8)), kind: fixup_MICROMIPS_HI16 # MM-32R2-EL: addu $1, $1, $4 # encoding: [0x81,0x00,0x50,0x09] -# MM-32R2-EL: lw $4, %lo(foo+(%lo(8)))($1) # encoding: [0x81'A',0xfc'A',0x00,0x00] -# MM-32R2-EL: # fixup A - offset: 0, value: %lo(foo+(%lo(8))), kind: fixup_MICROMIPS_LO16 +# MM-32R2-EL: lw $4, %lo(foo+%lo(8))($1) # encoding: [0x81'A',0xfc'A',0x00,0x00] +# MM-32R2-EL: # fixup A - offset: 0, value: %lo(foo+%lo(8)), kind: fixup_MICROMIPS_LO16 # MM-32R2-EL: lw $4, %lo(12+foo)($4) # encoding: [0x84'A',0xfc'A',0x00,0x00] # MM-32R2-EL: # fixup A - offset: 0, value: %lo(12+foo), kind: fixup_MICROMIPS_LO16 # MM-32R2-EL: lw $4, 10($4) # encoding: [0x84,0xfc,0x0a,0x00] diff --git a/llvm/test/MC/Mips/macro-div.s b/llvm/test/MC/Mips/macro-div.s index 8ce30d745bcf5..884618b667894 100644 --- a/llvm/test/MC/Mips/macro-div.s +++ b/llvm/test/MC/Mips/macro-div.s @@ -5,16 +5,16 @@ div $25,$11 # CHECK-NOTRAP: bnez $11, $tmp0 # encoding: [0x15,0x60,A,A] -# CHECK-NOTRAP: # fixup A - offset: 0, value: ($tmp0)-4, kind: fixup_Mips_PC16 +# CHECK-NOTRAP: # fixup A - offset: 0, value: $tmp0-4, kind: fixup_Mips_PC16 # CHECK-NOTRAP: div $zero, $25, $11 # encoding: [0x03,0x2b,0x00,0x1a] # CHECK-NOTRAP: break 7 # encoding: [0x00,0x07,0x00,0x0d] # CHECK-NOTRAP: $tmp0: # CHECK-NOTRAP: addiu $1, $zero, -1 # encoding: [0x24,0x01,0xff,0xff] # CHECK-NOTRAP: bne $11, $1, $tmp1 # encoding: [0x15,0x61,A,A] -# CHECK-NOTRAP: # fixup A - offset: 0, value: ($tmp1)-4, kind: fixup_Mips_PC16 +# CHECK-NOTRAP: # fixup A - offset: 0, value: $tmp1-4, kind: fixup_Mips_PC16 # CHECK-NOTRAP: lui $1, 32768 # encoding: [0x3c,0x01,0x80,0x00] # CHECK-NOTRAP: bne $25, $1, $tmp1 # encoding: [0x17,0x21,A,A] -# CHECK-NOTRAP: # fixup A - offset: 0, value: ($tmp1)-4, kind: fixup_Mips_PC16 +# CHECK-NOTRAP: # fixup A - offset: 0, value: $tmp1-4, kind: fixup_Mips_PC16 # CHECK-NOTRAP: nop # encoding: [0x00,0x00,0x00,0x00] # CHECK-NOTRAP: break 6 # encoding: [0x00,0x06,0x00,0x0d] # CHECK-NOTRAP: $tmp1: @@ -23,7 +23,7 @@ # CHECK-TRAP: div $zero, $25, $11 # encoding: [0x03,0x2b,0x00,0x1a] # CHECK-TRAP: addiu $1, $zero, -1 # encoding: [0x24,0x01,0xff,0xff] # CHECK-TRAP: bne $11, $1, $tmp0 # encoding: [0x15,0x61,A,A] -# CHECK-TRAP: # fixup A - offset: 0, value: ($tmp0)-4, kind: fixup_Mips_PC16 +# CHECK-TRAP: # fixup A - offset: 0, value: $tmp0-4, kind: fixup_Mips_PC16 # CHECK-TRAP: lui $1, 32768 # encoding: [0x3c,0x01,0x80,0x00] # CHECK-TRAP: teq $25, $1, 6 # encoding: [0x03,0x21,0x01,0xb4] # CHECK-TRAP: $tmp0: @@ -31,16 +31,16 @@ div $24,$12 # CHECK-NOTRAP: bnez $12, $tmp2 # encoding: [0x15,0x80,A,A] -# CHECK-NOTRAP: # fixup A - offset: 0, value: ($tmp2)-4, kind: fixup_Mips_PC16 +# CHECK-NOTRAP: # fixup A - offset: 0, value: $tmp2-4, kind: fixup_Mips_PC16 # CHECK-NOTRAP: div $zero, $24, $12 # encoding: [0x03,0x0c,0x00,0x1a] # CHECK-NOTRAP: break 7 # encoding: [0x00,0x07,0x00,0x0d] # CHECK-NOTRAP: $tmp2: # CHECK-NOTRAP: addiu $1, $zero, -1 # encoding: [0x24,0x01,0xff,0xff] # CHECK-NOTRAP: bne $12, $1, $tmp3 # encoding: [0x15,0x81,A,A] -# CHECK-NOTRAP: # fixup A - offset: 0, value: ($tmp3)-4, kind: fixup_Mips_PC16 +# CHECK-NOTRAP: # fixup A - offset: 0, value: $tmp3-4, kind: fixup_Mips_PC16 # CHECK-NOTRAP: lui $1, 32768 # encoding: [0x3c,0x01,0x80,0x00] # CHECK-NOTRAP: bne $24, $1, $tmp3 # encoding: [0x17,0x01,A,A] -# CHECK-NOTRAP: # fixup A - offset: 0, value: ($tmp3)-4, kind: fixup_Mips_PC16 +# CHECK-NOTRAP: # fixup A - offset: 0, value: $tmp3-4, kind: fixup_Mips_PC16 # CHECK-NOTRAP: nop # encoding: [0x00,0x00,0x00,0x00] # CHECK-NOTRAP: break 6 # encoding: [0x00,0x06,0x00,0x0d] # CHECK-NOTRAP: $tmp3: @@ -49,7 +49,7 @@ # CHECK-TRAP: div $zero, $24, $12 # encoding: [0x03,0x0c,0x00,0x1a] # CHECK-TRAP: addiu $1, $zero, -1 # encoding: [0x24,0x01,0xff,0xff] # CHECK-TRAP: bne $12, $1, $tmp1 # encoding: [0x15,0x81,A,A] -# CHECK-TRAP: # fixup A - offset: 0, value: ($tmp1)-4, kind: fixup_Mips_PC16 +# CHECK-TRAP: # fixup A - offset: 0, value: $tmp1-4, kind: fixup_Mips_PC16 # CHECK-TRAP: lui $1, 32768 # encoding: [0x3c,0x01,0x80,0x00] # CHECK-TRAP: teq $24, $1, 6 # encoding: [0x03,0x01,0x01,0xb4] # CHECK-TRAP: $tmp1: @@ -127,16 +127,16 @@ div $4,$5,$6 # CHECK-NOTRAP: bnez $6, $tmp4 # encoding: [0x14,0xc0,A,A] -# CHECK-NOTRAP: # fixup A - offset: 0, value: ($tmp4)-4, kind: fixup_Mips_PC16 +# CHECK-NOTRAP: # fixup A - offset: 0, value: $tmp4-4, kind: fixup_Mips_PC16 # CHECK-NOTRAP: div $zero, $5, $6 # encoding: [0x00,0xa6,0x00,0x1a] # CHECK-NOTRAP: break 7 # encoding: [0x00,0x07,0x00,0x0d] # CHECK-NOTRAP: $tmp4: # CHECK-NOTRAP: addiu $1, $zero, -1 # encoding: [0x24,0x01,0xff,0xff] # CHECK-NOTRAP: bne $6, $1, $tmp5 # encoding: [0x14,0xc1,A,A] -# CHECK-NOTRAP: # fixup A - offset: 0, value: ($tmp5)-4, kind: fixup_Mips_PC16 +# CHECK-NOTRAP: # fixup A - offset: 0, value: $tmp5-4, kind: fixup_Mips_PC16 # CHECK-NOTRAP: lui $1, 32768 # encoding: [0x3c,0x01,0x80,0x00] # CHECK-NOTRAP: bne $5, $1, $tmp5 # encoding: [0x14,0xa1,A,A] -# CHECK-NOTRAP: # fixup A - offset: 0, value: ($tmp5)-4, kind: fixup_Mips_PC16 +# CHECK-NOTRAP: # fixup A - offset: 0, value: $tmp5-4, kind: fixup_Mips_PC16 # CHECK-NOTRAP: nop # encoding: [0x00,0x00,0x00,0x00] # CHECK-NOTRAP: break 6 # encoding: [0x00,0x06,0x00,0x0d] # CHECK-NOTRAP: $tmp5: @@ -145,7 +145,7 @@ # CHECK-TRAP: div $zero, $5, $6 # encoding: [0x00,0xa6,0x00,0x1a] # CHECK-TRAP: addiu $1, $zero, -1 # encoding: [0x24,0x01,0xff,0xff] # CHECK-TRAP: bne $6, $1, $tmp2 # encoding: [0x14,0xc1,A,A] -# CHECK-TRAP: # fixup A - offset: 0, value: ($tmp2)-4, kind: fixup_Mips_PC16 +# CHECK-TRAP: # fixup A - offset: 0, value: $tmp2-4, kind: fixup_Mips_PC16 # CHECK-TRAP: lui $1, 32768 # encoding: [0x3c,0x01,0x80,0x00] # CHECK-TRAP: teq $5, $1, 6 # encoding: [0x00,0xa1,0x01,0xb4] # CHECK-TRAP: $tmp2: diff --git a/llvm/test/MC/Mips/macro-divu.s b/llvm/test/MC/Mips/macro-divu.s index a3e8ae067c747..8b4b3ea4dbec2 100644 --- a/llvm/test/MC/Mips/macro-divu.s +++ b/llvm/test/MC/Mips/macro-divu.s @@ -5,7 +5,7 @@ divu $25,$11 # CHECK-NOTRAP: bnez $11, $tmp0 # encoding: [0x15,0x60,A,A] -# CHECK-NOTRAP: # fixup A - offset: 0, value: ($tmp0)-4, kind: fixup_Mips_PC16 +# CHECK-NOTRAP: # fixup A - offset: 0, value: $tmp0-4, kind: fixup_Mips_PC16 # CHECK-NOTRAP: divu $zero, $25, $11 # encoding: [0x03,0x2b,0x00,0x1b] # CHECK-NOTRAP: break 7 # encoding: [0x00,0x07,0x00,0x0d] # CHECK-NOTRAP: $tmp0: @@ -13,7 +13,7 @@ divu $24,$12 # CHECK-NOTRAP: bnez $12, $tmp1 # encoding: [0x15,0x80,A,A] -# CHECK-NOTRAP: # fixup A - offset: 0, value: ($tmp1)-4, kind: fixup_Mips_PC16 +# CHECK-NOTRAP: # fixup A - offset: 0, value: $tmp1-4, kind: fixup_Mips_PC16 # CHECK-NOTRAP: divu $zero, $24, $12 # encoding: [0x03,0x0c,0x00,0x1b] # CHECK-NOTRAP: break 7 # encoding: [0x00,0x07,0x00,0x0d] # CHECK-NOTRAP: $tmp1: @@ -30,7 +30,7 @@ divu $4,$5,$6 # CHECK-NOTRAP: bnez $6, $tmp2 # encoding: [0x14,0xc0,A,A] -# CHECK-NOTRAP: # fixup A - offset: 0, value: ($tmp2)-4, kind: fixup_Mips_PC16 +# CHECK-NOTRAP: # fixup A - offset: 0, value: $tmp2-4, kind: fixup_Mips_PC16 # CHECK-NOTRAP: divu $zero, $5, $6 # encoding: [0x00,0xa6,0x00,0x1b] # CHECK-NOTRAP: break 7 # encoding: [0x00,0x07,0x00,0x0d] # CHECK-NOTRAP: $tmp2: diff --git a/llvm/test/MC/Mips/macro-rem.s b/llvm/test/MC/Mips/macro-rem.s index 40812949664d6..a33c4a098ed69 100644 --- a/llvm/test/MC/Mips/macro-rem.s +++ b/llvm/test/MC/Mips/macro-rem.s @@ -5,16 +5,16 @@ rem $4,$5 # CHECK-NOTRAP: bnez $5, $tmp0 # encoding: [A,A,0xa0,0x14] -# CHECK-NOTRAP: # fixup A - offset: 0, value: ($tmp0)-4, kind: fixup_Mips_PC16 +# CHECK-NOTRAP: # fixup A - offset: 0, value: $tmp0-4, kind: fixup_Mips_PC16 # CHECK-NOTRAP: div $zero, $4, $5 # encoding: [0x1a,0x00,0x85,0x00] # CHECK-NOTRAP: break 7 # encoding: [0x0d,0x00,0x07,0x00] # CHECK-NOTRAP: $tmp0 # CHECK-NOTRAP: addiu $1, $zero, -1 # encoding: [0xff,0xff,0x01,0x24] # CHECK-NOTRAP: bne $5, $1, $tmp1 # encoding: [A,A,0xa1,0x14] -# CHECK-NOTRAP: # fixup A - offset: 0, value: ($tmp1)-4, kind: fixup_Mips_PC16 +# CHECK-NOTRAP: # fixup A - offset: 0, value: $tmp1-4, kind: fixup_Mips_PC16 # CHECK-NOTRAP: lui $1, 32768 # encoding: [0x00,0x80,0x01,0x3c] # CHECK-NOTRAP: bne $4, $1, $tmp1 # encoding: [A,A,0x81,0x14] -# CHECK-NOTRAP: # fixup A - offset: 0, value: ($tmp1)-4, kind: fixup_Mips_PC16 +# CHECK-NOTRAP: # fixup A - offset: 0, value: $tmp1-4, kind: fixup_Mips_PC16 # CHECK-NOTRAP: nop # encoding: [0x00,0x00,0x00,0x00] # CHECK-NOTRAP: break 6 # encoding: [0x0d,0x00,0x06,0x00] # CHECK-NOTRAP: $tmp1 diff --git a/llvm/test/MC/Mips/macro-remu.s b/llvm/test/MC/Mips/macro-remu.s index 5e7b150e2105e..6520d17426419 100644 --- a/llvm/test/MC/Mips/macro-remu.s +++ b/llvm/test/MC/Mips/macro-remu.s @@ -5,7 +5,7 @@ remu $4,$5 # CHECK-NOTRAP: bnez $5, $tmp0 # encoding: [A,A,0xa0,0x14] -# CHECK-NOTRAP: # fixup A - offset: 0, value: ($tmp0)-4, kind: fixup_Mips_PC16 +# CHECK-NOTRAP: # fixup A - offset: 0, value: $tmp0-4, kind: fixup_Mips_PC16 # CHECK-NOTRAP: divu $zero, $4, $5 # encoding: [0x1b,0x00,0x85,0x00] # CHECK-NOTRAP: break 7 # encoding: [0x0d,0x00,0x07,0x00] # CHECK-NOTRAP: mfhi $4 # encoding: [0x10,0x20,0x00,0x00] @@ -82,7 +82,7 @@ remu $4,$5,$6 # CHECK-NOTRAP: bnez $6, $tmp1 # encoding: [A,A,0xc0,0x14] -# CHECK-NOTRAP: # fixup A - offset: 0, value: ($tmp1)-4, kind: fixup_Mips_PC16 +# CHECK-NOTRAP: # fixup A - offset: 0, value: $tmp1-4, kind: fixup_Mips_PC16 # CHECK-NOTRAP: divu $zero, $5, $6 # encoding: [0x1b,0x00,0xa6,0x00] # CHECK-NOTRAP: break 7 # encoding: [0x0d,0x00,0x07,0x00] # CHECK-NOTRAP: $tmp1 diff --git a/llvm/test/MC/Mips/memory-offsets.s b/llvm/test/MC/Mips/memory-offsets.s index 895d8c6ba5fcf..7f5a59152eee6 100644 --- a/llvm/test/MC/Mips/memory-offsets.s +++ b/llvm/test/MC/Mips/memory-offsets.s @@ -25,9 +25,9 @@ __start: lw $31, (8 * 4) % (8 * 31)($29) # CHECK: lw $ra, 32($sp) # encoding: [0x8f,0xbf,0x00,0x20] lw $31, (8 * 4) % (8)($29) # CHECK: lw $ra, 0($sp) # encoding: [0x8f,0xbf,0x00,0x00] lw $31, (8 * 4) + (8 * 31) ($29) # CHECK: lw $ra, 280($sp) # encoding: [0x8f,0xbf,0x01,0x18] - lw $31, (8*4) + (8*31) + (8*32 + __start) ($29) # CHECK: lui $ra, %hi((248+((8*32)+__start))+32) # encoding: [0x3c,0x1f,A,A] - # CHECK: # fixup A - offset: 0, value: %hi((248+((8*32)+__start))+32), kind: fixup_Mips_HI16 + lw $31, (8*4) + (8*31) + (8*32 + __start) ($29) # CHECK: lui $ra, %hi(248+((8*32)+__start)+32) # encoding: [0x3c,0x1f,A,A] + # CHECK: # fixup A - offset: 0, value: %hi(248+((8*32)+__start)+32), kind: fixup_Mips_HI16 # CHECK: addu $ra, $ra, $sp # encoding: [0x03,0xfd,0xf8,0x21] - # CHECK: lw $ra, %lo((248+((8*32)+__start))+32)($ra) # encoding: [0x8f,0xff,A,A] - # CHECK: # fixup A - offset: 0, value: %lo((248+((8*32)+__start))+32), kind: fixup_Mips_LO16 + # CHECK: lw $ra, %lo(248+((8*32)+__start)+32)($ra) # encoding: [0x8f,0xff,A,A] + # CHECK: # fixup A - offset: 0, value: %lo(248+((8*32)+__start)+32), kind: fixup_Mips_LO16 .end __start diff --git a/llvm/test/MC/Mips/mips-fpu-instructions.s b/llvm/test/MC/Mips/mips-fpu-instructions.s index 733231afb793c..e740372a5e9f1 100644 --- a/llvm/test/MC/Mips/mips-fpu-instructions.s +++ b/llvm/test/MC/Mips/mips-fpu-instructions.s @@ -141,7 +141,7 @@ # FP move instructions #------------------------------------------------------------------------------ # CHECK: bc1f $BB_1 # encoding: [A,A,0x00,0x45] -# CHECK: # fixup A - offset: 0, value: ($BB_1)-4, kind: fixup_Mips_PC16 +# CHECK: # fixup A - offset: 0, value: $BB_1-4, kind: fixup_Mips_PC16 # CHECK: cfc1 $6, $0 # encoding: [0x00,0x00,0x46,0x44] # CHECK: ctc1 $10, $31 # encoding: [0x00,0xf8,0xca,0x44] diff --git a/llvm/test/MC/Mips/mips1/valid.s b/llvm/test/MC/Mips/mips1/valid.s index a67c93846ac98..95d4312845c34 100644 --- a/llvm/test/MC/Mips/mips1/valid.s +++ b/llvm/test/MC/Mips/mips1/valid.s @@ -52,7 +52,7 @@ a: # CHECK-NEXT: # %t | FileCheck %s --check-prefix=CHECK @@ -83,14 +83,6 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: store i8 1, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: exit: -; CHECK-NEXT: ret void ; ; CHECK-VF8-LABEL: @main_vf_vscale_x_16( ; CHECK-VF8-NEXT: iter.check: @@ -144,14 +136,6 @@ define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-VF8-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-VF8-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-VF8: for.body: -; CHECK-VF8-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-VF8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]] -; CHECK-VF8-NEXT: store i8 1, ptr [[ARRAYIDX]], align 1 -; CHECK-VF8-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-VF8-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], 1024 -; CHECK-VF8-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK-VF8: exit: -; CHECK-VF8-NEXT: ret void ; entry: br label %for.body @@ -236,14 +220,6 @@ define void @main_vf_vscale_x_2(ptr %A) #0 vscale_range(8, 8) { ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: store i64 1, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP7:![0-9]+]] -; CHECK: exit: -; CHECK-NEXT: ret void ; ; CHECK-VF8-LABEL: @main_vf_vscale_x_2( ; CHECK-VF8-NEXT: iter.check: @@ -297,14 +273,6 @@ define void @main_vf_vscale_x_2(ptr %A) #0 vscale_range(8, 8) { ; CHECK-VF8-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-VF8-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-VF8: for.body: -; CHECK-VF8-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-VF8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; CHECK-VF8-NEXT: store i64 1, ptr [[ARRAYIDX]], align 1 -; CHECK-VF8-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-VF8-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], 1024 -; CHECK-VF8-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP7:![0-9]+]] -; CHECK-VF8: exit: -; CHECK-VF8-NEXT: ret void ; entry: br label %for.body @@ -391,15 +359,6 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 { ; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], [[ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL5]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: store i8 0, ptr [[PTR_IV]], align 1 -; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i8, ptr [[PTR_IV]], i64 1 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 10000 -; CHECK-NEXT: br i1 [[CMP]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP10:![0-9]+]] -; CHECK: exit: -; CHECK-NEXT: ret void ; ; CHECK-VF8-LABEL: @test_pr57912_pointer_induction( ; CHECK-VF8-NEXT: iter.check: @@ -456,15 +415,6 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 { ; CHECK-VF8-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END1]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], [[ITER_CHECK]] ] ; CHECK-VF8-NEXT: br label [[LOOP:%.*]] ; CHECK-VF8: loop: -; CHECK-VF8-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-VF8-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL2]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-VF8-NEXT: store i8 0, ptr [[PTR_IV]], align 1 -; CHECK-VF8-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i8, ptr [[PTR_IV]], i64 1 -; CHECK-VF8-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-VF8-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 10000 -; CHECK-VF8-NEXT: br i1 [[CMP]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP10:![0-9]+]] -; CHECK-VF8: exit: -; CHECK-VF8-NEXT: ret void ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll index ef3d5b59632dc..cfb96b4f5a61f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter-out-after "^scalar.ph:" ; RUN: opt -passes=loop-vectorize,dce -prefer-predicate-over-epilogue=scalar-epilogue \ ; RUN: -enable-epilogue-vectorization=false < %s -S | FileCheck %s @@ -59,22 +59,6 @@ define void @fneg(ptr nocapture noundef writeonly %d, ptr nocapture noundef read ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: ret void -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds half, ptr [[S]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP24:%.*]] = load half, ptr [[ARRAYIDX]], align 2 -; CHECK-NEXT: [[FNEG:%.*]] = fneg half [[TMP24]] -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds half, ptr [[D]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store half [[FNEG]], ptr [[ARRAYIDX2]], align 2 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; entry: %cmp6 = icmp sgt i32 %n, 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll index d328124e6c783..663cf4173cc91 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter-out-after "^scalar.ph:" ; RUN: opt -passes=loop-vectorize,dce,instcombine -mtriple aarch64-linux-gnu -mattr=+sve \ ; RUN: -prefer-predicate-over-epilogue=scalar-epilogue -S %s -force-target-instruction-cost=1 -o - | FileCheck %s @@ -31,21 +31,6 @@ define void @gather_nxv4i32_ind64(ptr noalias nocapture readonly %a, ptr noalias ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store float [[TMP10]], ptr [[ARRAYIDX5]], align 4 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: ret void ; entry: br label %for.body @@ -99,22 +84,6 @@ define void @scatter_nxv4i32_ind32(ptr noalias nocapture %a, ptr noalias nocaptu ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4 -; CHECK-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IDXPROM4]] -; CHECK-NEXT: store float [[TMP10]], ptr [[ARRAYIDX5]], align 4 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: ret void ; entry: br label %for.body @@ -165,23 +134,6 @@ define void @scatter_inv_nxv4i32(ptr noalias nocapture %inv, ptr noalias nocaptu ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP8]], 0 -; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]] -; CHECK: if.then: -; CHECK-NEXT: store i32 3, ptr [[INV]], align 4 -; CHECK-NEXT: br label [[FOR_INC]] -; CHECK: for.inc: -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: ret void ; entry: br label %for.body @@ -236,24 +188,6 @@ define void @gather_inv_nxv4i32(ptr noalias nocapture %a, ptr noalias nocapture ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[TMP8]], 3 -; CHECK-NEXT: br i1 [[CMP2]], label [[IF_THEN:%.*]], label [[FOR_INC]] -; CHECK: if.then: -; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[INV]], align 4 -; CHECK-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: br label [[FOR_INC]] -; CHECK: for.inc: -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: ret void ; entry: br label %for.body @@ -323,20 +257,6 @@ define void @gather_nxv4i32_ind64_stride2(ptr noalias nocapture %a, ptr noalias ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX_IDX:%.*]] = shl i64 [[INDVARS_IV]], 3 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[ARRAYIDX_IDX]] -; CHECK-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store float [[TMP16]], ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: ret void ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll index 0cd3870914283..2f90b5a332bdc 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter-out-after "^scalar.ph:" ; RUN: opt -passes=loop-vectorize,dce,instcombine -force-target-instruction-cost=1 \ ; RUN: -prefer-predicate-over-epilogue=scalar-epilogue < %s -S | FileCheck %s @@ -45,25 +45,6 @@ define void @cond_ind64(ptr noalias nocapture %a, ptr noalias nocapture readonly ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[AND:%.*]] = and i64 [[I_08]], 1 -; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i64 [[AND]], 0 -; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]] -; CHECK: if.then: -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I_08]] -; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_08]] -; CHECK-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX1]], align 4 -; CHECK-NEXT: br label [[FOR_INC]] -; CHECK: for.inc: -; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: exit: -; CHECK-NEXT: ret void ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll index c890cb71d34be..8bbda981895ac 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter-out-after "^scalar.ph:" ; RUN: opt -mtriple=aarch64-none-linux-gnu -S -passes=loop-vectorize,instcombine -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses=true -mattr=+sve -scalable-vectorization=on -runtime-memory-check-threshold=24 < %s | FileCheck %s target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" @@ -52,11 +52,6 @@ define void @test_array_load2_store2(i32 %C, i32 %D) #1 { ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP3:![0-9]+]] -; CHECK: for.end: -; CHECK-NEXT: ret void ; entry: br label %for.body @@ -139,11 +134,6 @@ define void @test_array_load2_i16_store2(i32 %C, i32 %D) #1 { ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP5:![0-9]+]] -; CHECK: for.end: -; CHECK-NEXT: ret void ; entry: br label %for.body @@ -230,11 +220,6 @@ define void @test_array_load2_store2_i16(i32 noundef %C, i32 noundef %D) #1 { ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP7:![0-9]+]] -; CHECK: for.end: -; CHECK-NEXT: ret void ; entry: br label %for.body @@ -308,12 +293,6 @@ define i32 @test_struct_load6(ptr %S) #1 { ; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP16]]) ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[SUB14_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[SUB14_LCSSA]] ; entry: br label %for.body @@ -418,11 +397,6 @@ define void @test_reversed_load2_store2(ptr noalias nocapture readonly %A, ptr n ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: ret void -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_COND_CLEANUP]], !llvm.loop [[LOOP11:![0-9]+]] ; entry: br label %for.body @@ -488,21 +462,6 @@ define void @even_load_static_tc(ptr noalias nocapture readonly %A, ptr noalias ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ poison, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: ret void -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[LOAD]], 1 -; CHECK-NEXT: [[LSHR:%.*]] = lshr exact i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[LSHR]] -; CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 2 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[INDVARS_IV]], 1022 -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP13:![0-9]+]] ; entry: br label %for.body @@ -574,21 +533,6 @@ define void @even_load_dynamic_tc(ptr noalias nocapture readonly %A, ptr noalias ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: ret void -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[LOAD]], 1 -; CHECK-NEXT: [[LSHR:%.*]] = lshr exact i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[LSHR]] -; CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 2 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP15:![0-9]+]] ; entry: br label %for.body @@ -660,11 +604,6 @@ define void @load_gap_reverse(ptr noalias nocapture readonly %P1, ptr noalias no ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_EXIT]], !llvm.loop [[LOOP17:![0-9]+]] -; CHECK: for.exit: -; CHECK-NEXT: ret void ; entry: br label %for.body @@ -726,11 +665,6 @@ define void @mixed_load2_store2(ptr noalias nocapture readonly %A, ptr noalias n ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: ret void -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_COND_CLEANUP]], !llvm.loop [[LOOP19:![0-9]+]] ; entry: br label %for.body @@ -815,15 +749,6 @@ define void @int_float_struct(ptr nocapture readonly %p) #0 { ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP6]]) ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[ADD3_LCSSA:%.*]] = phi float [ poison, [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: store i32 [[ADD_LCSSA]], ptr @SA, align 4 -; CHECK-NEXT: store float [[ADD3_LCSSA]], ptr @SB, align 4 -; CHECK-NEXT: ret void -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; entry: br label %for.body @@ -908,19 +833,6 @@ define void @PR27626_0(ptr %p, i32 %z, i64 %n) #1 { ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[P_I_X:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[I]], i32 0 -; CHECK-NEXT: [[P_I_Y:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[I]], i32 1 -; CHECK-NEXT: store i32 [[Z]], ptr [[P_I_X]], align 4 -; CHECK-NEXT: store i32 [[Z]], ptr [[P_I_Y]], align 4 -; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 -; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP23:![0-9]+]] -; CHECK: for.end: -; CHECK-NEXT: ret void ; entry: br label %for.body @@ -996,22 +908,6 @@ define i32 @PR27626_1(ptr %p, i64 %n) #1 { ; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP17]]) ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP19]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[S:%.*]] = phi i32 [ [[TMP21:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[P_I_X:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[I]], i32 0 -; CHECK-NEXT: [[P_I_Y:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[I]], i32 1 -; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[P_I_X]], align 4 -; CHECK-NEXT: store i32 [[TMP20]], ptr [[P_I_Y]], align 4 -; CHECK-NEXT: [[TMP21]] = add nsw i32 [[TMP20]], [[S]] -; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 -; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP25:![0-9]+]] -; CHECK: for.end: -; CHECK-NEXT: ret i32 [[TMP21]] ; entry: br label %for.body @@ -1086,21 +982,6 @@ define void @PR27626_2(ptr %p, i64 %n, i32 %z) #1 { ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[P_I_X:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[I]], i32 0 -; CHECK-NEXT: [[P_I_MINUS_1_X:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 -8 -; CHECK-NEXT: [[P_I_Y:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[I]], i32 1 -; CHECK-NEXT: store i32 [[Z]], ptr [[P_I_X]], align 4 -; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[P_I_MINUS_1_X]], align 4 -; CHECK-NEXT: store i32 [[TMP17]], ptr [[P_I_Y]], align 4 -; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 -; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP27:![0-9]+]] -; CHECK: for.end: -; CHECK-NEXT: ret void ; entry: br label %for.body @@ -1178,25 +1059,6 @@ define i32 @PR27626_3(ptr %p, i64 %n, i32 %z) #1 { ; CHECK-NEXT: [[TMP20:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP18]]) ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP20]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[S:%.*]] = phi i32 [ [[TMP23:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[I_PLUS_1:%.*]] = add nuw nsw i64 [[I]], 1 -; CHECK-NEXT: [[P_I_X:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[I]], i32 0 -; CHECK-NEXT: [[P_I_Y:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[I]], i32 1 -; CHECK-NEXT: [[P_I_PLUS_1_Y:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[I_PLUS_1]], i32 1 -; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[P_I_X]], align 4 -; CHECK-NEXT: store i32 [[TMP21]], ptr [[P_I_PLUS_1_Y]], align 4 -; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[P_I_Y]], align 4 -; CHECK-NEXT: [[TMP23]] = add nsw i32 [[TMP22]], [[S]] -; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 -; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP29:![0-9]+]] -; CHECK: for.end: -; CHECK-NEXT: ret i32 [[TMP23]] ; entry: br label %for.body @@ -1279,20 +1141,6 @@ define void @PR27626_4(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[I_PLUS_1:%.*]] = or disjoint i64 [[I]], 1 -; CHECK-NEXT: [[A_I:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I]] -; CHECK-NEXT: [[A_I_PLUS_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_PLUS_1]] -; CHECK-NEXT: store i32 [[Y]], ptr [[A_I]], align 4 -; CHECK-NEXT: store i32 [[Z]], ptr [[A_I_PLUS_1]], align 4 -; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 2 -; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP31:![0-9]+]] -; CHECK: for.end: -; CHECK-NEXT: ret void ; entry: br label %for.body @@ -1375,23 +1223,6 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[A_I:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I]] -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[A]], i64 [[I]] -; CHECK-NEXT: [[A_I_MINUS_1:%.*]] = getelementptr i8, ptr [[TMP19]], i64 -4 -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[A]], i64 [[I]] -; CHECK-NEXT: [[A_I_MINUS_3:%.*]] = getelementptr i8, ptr [[TMP20]], i64 -12 -; CHECK-NEXT: store i32 [[X]], ptr [[A_I_MINUS_1]], align 4 -; CHECK-NEXT: store i32 [[Y]], ptr [[A_I_MINUS_3]], align 4 -; CHECK-NEXT: store i32 [[Z]], ptr [[A_I]], align 4 -; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 2 -; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP33:![0-9]+]] -; CHECK: for.end: -; CHECK-NEXT: ret void ; entry: br label %for.body @@ -1492,32 +1323,6 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[TMP33:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LOAD2:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV2:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ], [ [[I1:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP33]] to i32 -; CHECK-NEXT: [[I1]] = add nuw nsw i64 [[I]], 1 -; CHECK-NEXT: [[IV1:%.*]] = or disjoint i64 [[IV]], 1 -; CHECK-NEXT: [[IV2]] = add nuw nsw i64 [[IV]], 2 -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[IV1]] -; CHECK-NEXT: [[LOAD1:%.*]] = load i16, ptr [[GEP1]], align 4 -; CHECK-NEXT: [[CONV1:%.*]] = sext i16 [[LOAD1]] to i32 -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[IV2]] -; CHECK-NEXT: [[LOAD2]] = load i16, ptr [[GEP2]], align 4 -; CHECK-NEXT: [[CONV2:%.*]] = sext i16 [[LOAD2]] to i32 -; CHECK-NEXT: [[MUL01:%.*]] = mul nsw i32 [[CONV]], [[CONV1]] -; CHECK-NEXT: [[MUL012:%.*]] = mul nsw i32 [[MUL01]], [[CONV2]] -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I]] -; CHECK-NEXT: store i32 [[MUL012]], ptr [[ARRAYIDX5]], align 4 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[END]], label [[LOOP]], !llvm.loop [[LOOP40:![0-9]+]] -; CHECK: end: -; CHECK-NEXT: ret void ; entry: %.pre = load i16, ptr %a @@ -1613,43 +1418,6 @@ define void @interleave_deinterleave(ptr writeonly noalias %dst, ptr readonly %a ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [[STRUCT_XYZT]], ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw [[STRUCT_XYZT]], ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP26]] -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw [[STRUCT_XYZT]], ptr [[DST]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX5]], align 4 -; CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 4 -; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[Y]], align 4 -; CHECK-NEXT: [[Y11:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX2]], i64 4 -; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[Y11]], align 4 -; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP28]], [[TMP29]] -; CHECK-NEXT: [[Y14:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX5]], i64 4 -; CHECK-NEXT: store i32 [[SUB]], ptr [[Y14]], align 4 -; CHECK-NEXT: [[Z:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 8 -; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[Z]], align 4 -; CHECK-NEXT: [[Z19:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX2]], i64 8 -; CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[Z19]], align 4 -; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[TMP30]], [[TMP31]] -; CHECK-NEXT: [[Z22:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX5]], i64 8 -; CHECK-NEXT: store i32 [[SHL]], ptr [[Z22]], align 4 -; CHECK-NEXT: [[T:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 12 -; CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[T]], align 4 -; CHECK-NEXT: [[T27:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX2]], i64 12 -; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[T27]], align 4 -; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[TMP32]], [[TMP33]] -; CHECK-NEXT: [[T30:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX5]], i64 12 -; CHECK-NEXT: store i32 [[SHR]], ptr [[T30]], align 4 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] -; CHECK: for.end: -; CHECK-NEXT: ret void ; entry: br label %for.body @@ -1768,11 +1536,6 @@ define void @interleave_deinterleave_reverse(ptr noalias nocapture readonly %A, ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: ret void -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_COND_CLEANUP]], !llvm.loop [[LOOP44:![0-9]+]] ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll index f2e0c9be2defe..9d6b691f3ed31 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter-out-after "^scalar.ph:" --version 2 ; RUN: opt -mtriple=aarch64-none-linux-gnu -S -passes=loop-vectorize,instcombine -mattr=+sve -force-vector-width=16 -force-vector-interleave=1 -scalable-vectorization=on -prefer-predicate-over-epilogue=scalar-epilogue %s 2>&1 | FileCheck %s -check-prefix=SCALAR_TAIL_FOLDING ; RUN: opt -mtriple=aarch64-none-linux-gnu -S -passes=loop-vectorize,instcombine -mattr=+sve -force-vector-width=16 -force-vector-interleave=1 -scalable-vectorization=on -prefer-predicate-over-epilogue=predicate-dont-vectorize %s 2>&1 | FileCheck %s -check-prefix=PREDICATED_TAIL_FOLDING @@ -70,36 +70,6 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SCALAR_TAIL_FOLDING: scalar.ph: -; SCALAR_TAIL_FOLDING-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; SCALAR_TAIL_FOLDING-NEXT: br label [[FOR_BODY:%.*]] -; SCALAR_TAIL_FOLDING: for.body: -; SCALAR_TAIL_FOLDING-NEXT: [[IX_024:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] -; SCALAR_TAIL_FOLDING-NEXT: [[CMP1:%.*]] = icmp samesign ugt i32 [[IX_024]], [[CONV]] -; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] -; SCALAR_TAIL_FOLDING: if.then: -; SCALAR_TAIL_FOLDING-NEXT: [[MUL:%.*]] = shl nuw nsw i32 [[IX_024]], 1 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP18:%.*]] = zext nneg i32 [[MUL]] to i64 -; SCALAR_TAIL_FOLDING-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[TMP18]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP19:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; SCALAR_TAIL_FOLDING-NEXT: [[ADD:%.*]] = or disjoint i32 [[MUL]], 1 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP20:%.*]] = zext nneg i32 [[ADD]] to i64 -; SCALAR_TAIL_FOLDING-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[TMP20]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP21:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1 -; SCALAR_TAIL_FOLDING-NEXT: [[SPEC_SELECT_I:%.*]] = call i8 @llvm.smax.i8(i8 [[TMP19]], i8 [[TMP21]]) -; SCALAR_TAIL_FOLDING-NEXT: [[TMP22:%.*]] = zext nneg i32 [[MUL]] to i64 -; SCALAR_TAIL_FOLDING-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 [[TMP22]] -; SCALAR_TAIL_FOLDING-NEXT: store i8 [[SPEC_SELECT_I]], ptr [[ARRAYIDX6]], align 1 -; SCALAR_TAIL_FOLDING-NEXT: [[SUB:%.*]] = sub i8 0, [[SPEC_SELECT_I]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP23:%.*]] = zext nneg i32 [[ADD]] to i64 -; SCALAR_TAIL_FOLDING-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 [[TMP23]] -; SCALAR_TAIL_FOLDING-NEXT: store i8 [[SUB]], ptr [[ARRAYIDX11]], align 1 -; SCALAR_TAIL_FOLDING-NEXT: br label [[FOR_INC]] -; SCALAR_TAIL_FOLDING: for.inc: -; SCALAR_TAIL_FOLDING-NEXT: [[INC]] = add nuw nsw i32 [[IX_024]], 1 -; SCALAR_TAIL_FOLDING-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1024 -; SCALAR_TAIL_FOLDING-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] -; SCALAR_TAIL_FOLDING: for.end: -; SCALAR_TAIL_FOLDING-NEXT: ret void ; ; PREDICATED_TAIL_FOLDING-LABEL: define dso_local void @masked_strided1 ; PREDICATED_TAIL_FOLDING-SAME: (ptr noalias readonly captures(none) [[P:%.*]], ptr noalias captures(none) [[Q:%.*]], i8 zeroext [[GUARD:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { @@ -148,15 +118,6 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; PREDICATED_TAIL_FOLDING: middle.block: ; PREDICATED_TAIL_FOLDING-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; PREDICATED_TAIL_FOLDING: scalar.ph: -; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_BODY:%.*]] -; PREDICATED_TAIL_FOLDING: for.body: -; PREDICATED_TAIL_FOLDING-NEXT: br i1 poison, label [[IF_THEN:%.*]], label [[FOR_INC:%.*]] -; PREDICATED_TAIL_FOLDING: if.then: -; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_INC]] -; PREDICATED_TAIL_FOLDING: for.inc: -; PREDICATED_TAIL_FOLDING-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] -; PREDICATED_TAIL_FOLDING: for.end: -; PREDICATED_TAIL_FOLDING-NEXT: ret void ; entry: %conv = zext i8 %guard to i32 @@ -247,28 +208,6 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SCALAR_TAIL_FOLDING: scalar.ph: -; SCALAR_TAIL_FOLDING-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; SCALAR_TAIL_FOLDING-NEXT: br label [[FOR_BODY:%.*]] -; SCALAR_TAIL_FOLDING: for.body: -; SCALAR_TAIL_FOLDING-NEXT: [[IX_012:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] -; SCALAR_TAIL_FOLDING-NEXT: [[MUL:%.*]] = shl nuw nsw i32 [[IX_012]], 1 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = zext nneg i32 [[MUL]] to i64 -; SCALAR_TAIL_FOLDING-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 [[TMP15]] -; SCALAR_TAIL_FOLDING-NEXT: store i8 1, ptr [[ARRAYIDX]], align 1 -; SCALAR_TAIL_FOLDING-NEXT: [[CMP1:%.*]] = icmp samesign ugt i32 [[IX_012]], [[CONV]] -; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] -; SCALAR_TAIL_FOLDING: if.then: -; SCALAR_TAIL_FOLDING-NEXT: [[ADD:%.*]] = or disjoint i32 [[MUL]], 1 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = zext nneg i32 [[ADD]] to i64 -; SCALAR_TAIL_FOLDING-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 [[TMP16]] -; SCALAR_TAIL_FOLDING-NEXT: store i8 2, ptr [[ARRAYIDX3]], align 1 -; SCALAR_TAIL_FOLDING-NEXT: br label [[FOR_INC]] -; SCALAR_TAIL_FOLDING: for.inc: -; SCALAR_TAIL_FOLDING-NEXT: [[INC]] = add nuw nsw i32 [[IX_012]], 1 -; SCALAR_TAIL_FOLDING-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1024 -; SCALAR_TAIL_FOLDING-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] -; SCALAR_TAIL_FOLDING: for.end: -; SCALAR_TAIL_FOLDING-NEXT: ret void ; ; PREDICATED_TAIL_FOLDING-LABEL: define dso_local void @masked_strided2 ; PREDICATED_TAIL_FOLDING-SAME: (ptr noalias readnone captures(none) [[P:%.*]], ptr noalias captures(none) [[Q:%.*]], i8 zeroext [[GUARD:%.*]]) local_unnamed_addr #[[ATTR0]] { @@ -310,15 +249,6 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING: middle.block: ; PREDICATED_TAIL_FOLDING-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; PREDICATED_TAIL_FOLDING: scalar.ph: -; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_BODY:%.*]] -; PREDICATED_TAIL_FOLDING: for.body: -; PREDICATED_TAIL_FOLDING-NEXT: br i1 poison, label [[IF_THEN:%.*]], label [[FOR_INC:%.*]] -; PREDICATED_TAIL_FOLDING: if.then: -; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_INC]] -; PREDICATED_TAIL_FOLDING: for.inc: -; PREDICATED_TAIL_FOLDING-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] -; PREDICATED_TAIL_FOLDING: for.end: -; PREDICATED_TAIL_FOLDING-NEXT: ret void ; entry: %conv = zext i8 %guard to i32 @@ -408,33 +338,6 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SCALAR_TAIL_FOLDING: scalar.ph: -; SCALAR_TAIL_FOLDING-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; SCALAR_TAIL_FOLDING-NEXT: br label [[FOR_BODY:%.*]] -; SCALAR_TAIL_FOLDING: for.body: -; SCALAR_TAIL_FOLDING-NEXT: [[IX_018:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] -; SCALAR_TAIL_FOLDING-NEXT: [[MUL:%.*]] = shl nuw nsw i32 [[IX_018]], 1 -; SCALAR_TAIL_FOLDING-NEXT: [[CMP1:%.*]] = icmp samesign ugt i32 [[IX_018]], [[CONV]] -; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] -; SCALAR_TAIL_FOLDING: if.then: -; SCALAR_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = zext nneg i32 [[MUL]] to i64 -; SCALAR_TAIL_FOLDING-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 [[TMP16]] -; SCALAR_TAIL_FOLDING-NEXT: store i8 1, ptr [[ARRAYIDX]], align 1 -; SCALAR_TAIL_FOLDING-NEXT: br label [[IF_END]] -; SCALAR_TAIL_FOLDING: if.end: -; SCALAR_TAIL_FOLDING-NEXT: [[CMP4:%.*]] = icmp samesign ugt i32 [[IX_018]], [[CONV3]] -; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP4]], label [[IF_THEN6:%.*]], label [[FOR_INC]] -; SCALAR_TAIL_FOLDING: if.then6: -; SCALAR_TAIL_FOLDING-NEXT: [[ADD:%.*]] = or disjoint i32 [[MUL]], 1 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP17:%.*]] = zext nneg i32 [[ADD]] to i64 -; SCALAR_TAIL_FOLDING-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 [[TMP17]] -; SCALAR_TAIL_FOLDING-NEXT: store i8 2, ptr [[ARRAYIDX7]], align 1 -; SCALAR_TAIL_FOLDING-NEXT: br label [[FOR_INC]] -; SCALAR_TAIL_FOLDING: for.inc: -; SCALAR_TAIL_FOLDING-NEXT: [[INC]] = add nuw nsw i32 [[IX_018]], 1 -; SCALAR_TAIL_FOLDING-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1024 -; SCALAR_TAIL_FOLDING-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] -; SCALAR_TAIL_FOLDING: for.end: -; SCALAR_TAIL_FOLDING-NEXT: ret void ; ; PREDICATED_TAIL_FOLDING-LABEL: define dso_local void @masked_strided3 ; PREDICATED_TAIL_FOLDING-SAME: (ptr noalias readnone captures(none) [[P:%.*]], ptr noalias captures(none) [[Q:%.*]], i8 zeroext [[GUARD1:%.*]], i8 zeroext [[GUARD2:%.*]]) local_unnamed_addr #[[ATTR0]] { @@ -481,19 +384,6 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING: middle.block: ; PREDICATED_TAIL_FOLDING-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; PREDICATED_TAIL_FOLDING: scalar.ph: -; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_BODY:%.*]] -; PREDICATED_TAIL_FOLDING: for.body: -; PREDICATED_TAIL_FOLDING-NEXT: br i1 poison, label [[IF_THEN:%.*]], label [[IF_END:%.*]] -; PREDICATED_TAIL_FOLDING: if.then: -; PREDICATED_TAIL_FOLDING-NEXT: br label [[IF_END]] -; PREDICATED_TAIL_FOLDING: if.end: -; PREDICATED_TAIL_FOLDING-NEXT: br i1 poison, label [[IF_THEN6:%.*]], label [[FOR_INC:%.*]] -; PREDICATED_TAIL_FOLDING: if.then6: -; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_INC]] -; PREDICATED_TAIL_FOLDING: for.inc: -; PREDICATED_TAIL_FOLDING-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] -; PREDICATED_TAIL_FOLDING: for.end: -; PREDICATED_TAIL_FOLDING-NEXT: ret void ; entry: %conv = zext i8 %guard1 to i32 @@ -613,52 +503,6 @@ define dso_local void @masked_strided_factor4(ptr noalias nocapture readonly %p, ; SCALAR_TAIL_FOLDING-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SCALAR_TAIL_FOLDING: scalar.ph: -; SCALAR_TAIL_FOLDING-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; SCALAR_TAIL_FOLDING-NEXT: br label [[FOR_BODY:%.*]] -; SCALAR_TAIL_FOLDING: for.body: -; SCALAR_TAIL_FOLDING-NEXT: [[IX_024:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] -; SCALAR_TAIL_FOLDING-NEXT: [[CMP1:%.*]] = icmp samesign ugt i32 [[IX_024]], [[CONV]] -; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] -; SCALAR_TAIL_FOLDING: if.then: -; SCALAR_TAIL_FOLDING-NEXT: [[IDX0:%.*]] = shl nuw nsw i32 [[IX_024]], 2 -; SCALAR_TAIL_FOLDING-NEXT: [[IDX1:%.*]] = or disjoint i32 [[IDX0]], 1 -; SCALAR_TAIL_FOLDING-NEXT: [[IDX2:%.*]] = or disjoint i32 [[IDX0]], 2 -; SCALAR_TAIL_FOLDING-NEXT: [[IDX3:%.*]] = or disjoint i32 [[IDX0]], 3 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP24:%.*]] = zext nneg i32 [[IDX0]] to i64 -; SCALAR_TAIL_FOLDING-NEXT: [[ARRAY1IDX0:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[TMP24]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP25:%.*]] = load i8, ptr [[ARRAY1IDX0]], align 1 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP26:%.*]] = zext nneg i32 [[IDX1]] to i64 -; SCALAR_TAIL_FOLDING-NEXT: [[ARRAY1IDX1:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[TMP26]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP27:%.*]] = load i8, ptr [[ARRAY1IDX1]], align 1 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP28:%.*]] = zext nneg i32 [[IDX2]] to i64 -; SCALAR_TAIL_FOLDING-NEXT: [[ARRAY1IDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[TMP28]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP29:%.*]] = load i8, ptr [[ARRAY1IDX2]], align 1 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP30:%.*]] = zext nneg i32 [[IDX3]] to i64 -; SCALAR_TAIL_FOLDING-NEXT: [[ARRAY1IDX3:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 [[TMP30]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP31:%.*]] = load i8, ptr [[ARRAY1IDX3]], align 1 -; SCALAR_TAIL_FOLDING-NEXT: [[SPEC_SELECT_I1:%.*]] = call i8 @llvm.smax.i8(i8 [[TMP25]], i8 [[TMP27]]) -; SCALAR_TAIL_FOLDING-NEXT: [[SUB1:%.*]] = sub i8 0, [[SPEC_SELECT_I1]] -; SCALAR_TAIL_FOLDING-NEXT: [[SPEC_SELECT_I2:%.*]] = call i8 @llvm.smax.i8(i8 [[TMP29]], i8 [[TMP31]]) -; SCALAR_TAIL_FOLDING-NEXT: [[SUB2:%.*]] = sub i8 0, [[SPEC_SELECT_I2]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP32:%.*]] = zext nneg i32 [[IDX0]] to i64 -; SCALAR_TAIL_FOLDING-NEXT: [[ARRAY3IDX0:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 [[TMP32]] -; SCALAR_TAIL_FOLDING-NEXT: store i8 [[SPEC_SELECT_I1]], ptr [[ARRAY3IDX0]], align 1 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP33:%.*]] = zext nneg i32 [[IDX1]] to i64 -; SCALAR_TAIL_FOLDING-NEXT: [[ARRAY3IDX1:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 [[TMP33]] -; SCALAR_TAIL_FOLDING-NEXT: store i8 [[SUB1]], ptr [[ARRAY3IDX1]], align 1 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP34:%.*]] = zext nneg i32 [[IDX2]] to i64 -; SCALAR_TAIL_FOLDING-NEXT: [[ARRAY3IDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 [[TMP34]] -; SCALAR_TAIL_FOLDING-NEXT: store i8 [[SPEC_SELECT_I2]], ptr [[ARRAY3IDX2]], align 1 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP35:%.*]] = zext nneg i32 [[IDX3]] to i64 -; SCALAR_TAIL_FOLDING-NEXT: [[ARRAY3IDX3:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 [[TMP35]] -; SCALAR_TAIL_FOLDING-NEXT: store i8 [[SUB2]], ptr [[ARRAY3IDX3]], align 1 -; SCALAR_TAIL_FOLDING-NEXT: br label [[FOR_INC]] -; SCALAR_TAIL_FOLDING: for.inc: -; SCALAR_TAIL_FOLDING-NEXT: [[INC]] = add nuw nsw i32 [[IX_024]], 1 -; SCALAR_TAIL_FOLDING-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1024 -; SCALAR_TAIL_FOLDING-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] -; SCALAR_TAIL_FOLDING: for.end: -; SCALAR_TAIL_FOLDING-NEXT: ret void ; ; PREDICATED_TAIL_FOLDING-LABEL: define dso_local void @masked_strided_factor4 ; PREDICATED_TAIL_FOLDING-SAME: (ptr noalias readonly captures(none) [[P:%.*]], ptr noalias captures(none) [[Q:%.*]], i8 zeroext [[GUARD:%.*]]) local_unnamed_addr #[[ATTR0]] { @@ -721,15 +565,6 @@ define dso_local void @masked_strided_factor4(ptr noalias nocapture readonly %p, ; PREDICATED_TAIL_FOLDING: middle.block: ; PREDICATED_TAIL_FOLDING-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; PREDICATED_TAIL_FOLDING: scalar.ph: -; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_BODY:%.*]] -; PREDICATED_TAIL_FOLDING: for.body: -; PREDICATED_TAIL_FOLDING-NEXT: br i1 poison, label [[IF_THEN:%.*]], label [[FOR_INC:%.*]] -; PREDICATED_TAIL_FOLDING: if.then: -; PREDICATED_TAIL_FOLDING-NEXT: br label [[FOR_INC]] -; PREDICATED_TAIL_FOLDING: for.inc: -; PREDICATED_TAIL_FOLDING-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] -; PREDICATED_TAIL_FOLDING: for.end: -; PREDICATED_TAIL_FOLDING-NEXT: ret void ; entry: %conv = zext i8 %guard to i32 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll index 4136a9f9e7938..8b009f1c91373 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter-out-after "^scalar.ph:" ; RUN: opt -passes=loop-vectorize -S -prefer-predicate-over-epilogue=scalar-epilogue < %s | FileCheck %s target triple = "aarch64-unknown-linux-gnu" @@ -35,18 +35,6 @@ define void @inv_store_i16(ptr noalias %dst, ptr noalias readonly %src, i64 %N) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_INC24:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY14:%.*]] -; CHECK: for.body14: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY14]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[SRC]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[LD:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 -; CHECK-NEXT: store i16 [[LD]], ptr [[DST]], align 2 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_INC24]], label [[FOR_BODY14]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: for.inc24: -; CHECK-NEXT: ret void ; entry: br label %for.body14 @@ -96,23 +84,6 @@ define void @cond_inv_store_i32(ptr noalias %dst, ptr noalias readonly %src, i64 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_09:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[I_09]] -; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP11]], 0 -; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] -; CHECK: if.then: -; CHECK-NEXT: store i32 [[TMP11]], ptr [[DST]], align 4 -; CHECK-NEXT: br label [[FOR_INC]] -; CHECK: for.inc: -; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_09]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] -; CHECK: for.end: -; CHECK-NEXT: ret void ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll index 846e9e6e82da5..bc4533f3011cb 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter-out-after "^scalar.ph:" ; RUN: opt -passes=loop-vectorize < %s -S -o - | FileCheck %s target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" @@ -58,22 +58,6 @@ define void @multiple_exits_unique_exit_block(ptr %A, ptr %B, i32 %N) #0 { ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] -; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY:%.*]] ] -; CHECK-NEXT: [[COND_0:%.*]] = icmp eq i32 [[IV]], [[N]] -; CHECK-NEXT: br i1 [[COND_0]], label [[EXIT:%.*]], label [[FOR_BODY]] -; CHECK: for.body: -; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[IV]] -; CHECK-NEXT: [[LV:%.*]] = load i32, ptr [[A_GEP]], align 4 -; CHECK-NEXT: [[B_GEP:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[IV]] -; CHECK-NEXT: store i32 [[LV]], ptr [[B_GEP]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 1 -; CHECK-NEXT: [[COND_1:%.*]] = icmp ult i32 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[COND_1]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP3:![0-9]+]] -; CHECK: exit: -; CHECK-NEXT: ret void ; entry: br label %loop.header @@ -148,24 +132,6 @@ define i32 @multiple_exits_multiple_exit_blocks(ptr %A, ptr %B, i32 %N) #0 { ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] -; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY:%.*]] ] -; CHECK-NEXT: [[COND_0:%.*]] = icmp eq i32 [[IV]], [[N]] -; CHECK-NEXT: br i1 [[COND_0]], label [[EXIT_0:%.*]], label [[FOR_BODY]] -; CHECK: for.body: -; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[IV]] -; CHECK-NEXT: [[LV:%.*]] = load i32, ptr [[A_GEP]], align 4 -; CHECK-NEXT: [[B_GEP:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[IV]] -; CHECK-NEXT: store i32 [[LV]], ptr [[B_GEP]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 1 -; CHECK-NEXT: [[COND_1:%.*]] = icmp ult i32 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[COND_1]], label [[LOOP_HEADER]], label [[EXIT_1:%.*]], !llvm.loop [[LOOP5:![0-9]+]] -; CHECK: exit.0: -; CHECK-NEXT: ret i32 1 -; CHECK: exit.1: -; CHECK-NEXT: ret i32 2 ; entry: br label %loop.header diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll index 6affb8ca8e7bd..2749b47325cbe 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter-out-after "^scalar.ph:" ; RUN: opt -passes=loop-vectorize -mattr=+sve -prefer-predicate-over-epilogue=scalar-epilogue -S %s | FileCheck %s target triple = "aarch64-unknown-linux-gnu" @@ -89,24 +89,6 @@ define void @min_trip_count_due_to_runtime_checks_1(ptr %dst.1, ptr %dst.2, ptr ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i64, ptr [[SRC_1]], i64 [[IV]] -; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i64, ptr [[SRC_2]], i64 [[IV]] -; CHECK-NEXT: [[L_1:%.*]] = load i64, ptr [[GEP_SRC_1]], align 8 -; CHECK-NEXT: [[L_2:%.*]] = load i64, ptr [[GEP_SRC_2]], align 8 -; CHECK-NEXT: [[ADD:%.*]] = add i64 [[L_1]], [[L_2]] -; CHECK-NEXT: [[GEP_DST_1:%.*]] = getelementptr i64, ptr [[DST_1]], i64 [[IV]] -; CHECK-NEXT: [[GEP_DST_2:%.*]] = getelementptr i64, ptr [[DST_2]], i64 [[IV]] -; CHECK-NEXT: store i64 [[ADD]], ptr [[GEP_DST_1]], align 8 -; CHECK-NEXT: store i64 [[ADD]], ptr [[GEP_DST_2]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 -; CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[CMP10]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP3:![0-9]+]] -; CHECK: exit: -; CHECK-NEXT: ret void ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll index 95aad199ac765..25403599977cb 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter-out-after "^scalar.ph:" ; REQUIRES: asserts ; RUN: opt -S -passes=loop-vectorize -debug-only=loop-vectorize < %s 2>%t | FileCheck %s ; RUN: cat %t | FileCheck %s --check-prefix=VPLANS @@ -77,17 +77,6 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[WHILE_BODY:%.*]] -; CHECK: while.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[INDEX]] -; CHECK-NEXT: store i32 [[VAL]], ptr [[GEP]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1 -; CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]] -; CHECK: while.end.loopexit: -; CHECK-NEXT: ret void ; entry: br label %while.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll index 3f4caeca5d452..63bb485e7f085 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter-out-after "^scalar.ph:" ; RUN: opt -passes=loop-vectorize -S < %s | FileCheck %s target triple = "aarch64-unknown-linux-gnu" @@ -38,22 +38,6 @@ define void @trip1025_i64(ptr noalias nocapture noundef %dst, ptr noalias nocapt ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_06:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[I_06]] -; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[MUL:%.*]] = shl nsw i64 [[TMP16]], 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[I_06]] -; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP17]], [[MUL]] -; CHECK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX1]], align 8 -; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_06]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 1025 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] -; CHECK: for.end: -; CHECK-NEXT: ret void ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll index f61d473a12ddb..b39c47cc7906d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter-out-after "^scalar.ph:" ; RUN: opt -passes='loop-vectorize,instcombine' -sve-tail-folding-insn-threshold=0 -sve-tail-folding=all -S < %s | FileCheck %s target triple = "aarch64" @@ -34,13 +34,6 @@ define void @cannot_overflow_i32_induction_var(ptr noalias %dst, ptr readonly %s ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] -; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: ret void ; entry: %cmp6.not = icmp eq i32 %N, 0 @@ -98,13 +91,6 @@ define void @can_overflow_i64_induction_var(ptr noalias %dst, ptr readonly %src, ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] -; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: ret void ; entry: %cmp6.not = icmp eq i64 %N, 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll index f4c9d783a3329..ce761913ea0fc 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-unroll.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter-out-after "^scalar.ph:" ; RUN: opt -S -passes=loop-vectorize -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue -force-vector-interleave=4 -force-vector-width=4 < %s | FileCheck %s target triple = "aarch64-unknown-linux-gnu" @@ -80,17 +80,6 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[WHILE_BODY:%.*]] -; CHECK: while.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[INDEX]] -; CHECK-NEXT: store i32 [[VAL]], ptr [[GEP]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1 -; CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]] -; CHECK: while.end.loopexit: -; CHECK-NEXT: ret void ; entry: br label %while.body @@ -206,24 +195,6 @@ define void @cond_memset(i32 %val, ptr noalias readonly %cond_ptr, ptr noalias % ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[WHILE_BODY:%.*]] -; CHECK: while.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_END:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[COND_GEP:%.*]] = getelementptr i32, ptr [[COND_PTR]], i64 [[INDEX]] -; CHECK-NEXT: [[COND_I32:%.*]] = load i32, ptr [[COND_GEP]], align 4 -; CHECK-NEXT: [[COND_I1:%.*]] = icmp ne i32 [[COND_I32]], 0 -; CHECK-NEXT: br i1 [[COND_I1]], label [[DO_STORE:%.*]], label [[WHILE_END]] -; CHECK: do.store: -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[INDEX]] -; CHECK-NEXT: store i32 [[VAL]], ptr [[GEP]], align 4 -; CHECK-NEXT: br label [[WHILE_END]] -; CHECK: while.end: -; CHECK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1 -; CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP5:![0-9]+]] -; CHECK: while.end.loopexit: -; CHECK-NEXT: ret void ; entry: br label %while.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll index 314ae92c45240..aab4f33f87c0f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter-out-after "^scalar.ph:" ; RUN: opt -S -hints-allow-reordering=false -passes=loop-vectorize -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue < %s | FileCheck %s target triple = "aarch64-unknown-linux-gnu" @@ -41,17 +41,6 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[WHILE_BODY:%.*]] -; CHECK: while.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[INDEX]] -; CHECK-NEXT: store i32 [[VAL]], ptr [[GEP]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1 -; CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP3:![0-9]+]] -; CHECK: while.end.loopexit: -; CHECK-NEXT: ret void ; entry: br label %while.body @@ -99,17 +88,6 @@ define void @simple_memset_v4i32(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[WHILE_BODY:%.*]] -; CHECK: while.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[INDEX]] -; CHECK-NEXT: store i32 [[VAL]], ptr [[GEP]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1 -; CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP5:![0-9]+]] -; CHECK: while.end.loopexit: -; CHECK-NEXT: ret void ; entry: br label %while.body @@ -165,19 +143,6 @@ define void @simple_memcpy(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[WHILE_BODY:%.*]] -; CHECK: while.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[INDEX]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[GEP1]], align 4 -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: store i32 [[VAL]], ptr [[GEP2]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1 -; CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]] -; CHECK: while.end.loopexit: -; CHECK-NEXT: ret void ; entry: br label %while.body @@ -245,19 +210,6 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[WHILE_BODY:%.*]] -; CHECK: while.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[INDEX]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[GEP1]], align 4 -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: store i32 [[VAL]], ptr [[GEP2]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 4 -; CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP9:![0-9]+]] -; CHECK: while.end.loopexit: -; CHECK-NEXT: ret void ; entry: br label %while.body @@ -316,21 +268,6 @@ define void @simple_gather_scatter(ptr noalias %dst, ptr noalias %src, ptr noali ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[WHILE_BODY:%.*]] -; CHECK: while.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr [[IND]], i64 [[INDEX]] -; CHECK-NEXT: [[IND_VAL:%.*]] = load i32, ptr [[GEP1]], align 4 -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[SRC]], i32 [[IND_VAL]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[GEP2]], align 4 -; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i32, ptr [[DST]], i32 [[IND_VAL]] -; CHECK-NEXT: store i32 [[VAL]], ptr [[GEP3]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1 -; CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP11:![0-9]+]] -; CHECK: while.end.loopexit: -; CHECK-NEXT: ret void ; entry: br label %while.body @@ -391,18 +328,6 @@ define void @uniform_load(ptr noalias %dst, ptr noalias readonly %src, i64 %n) # ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[SRC]], align 4 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store i32 [[VAL]], ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] -; CHECK: for.end: -; CHECK-NEXT: ret void ; entry: @@ -470,26 +395,6 @@ define void @cond_uniform_load(ptr noalias %dst, ptr noalias readonly %src, ptr ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[IF_END:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[COND]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP20]], 0 -; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END]], label [[IF_THEN:%.*]] -; CHECK: if.then: -; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[SRC]], align 4 -; CHECK-NEXT: br label [[IF_END]] -; CHECK: if.end: -; CHECK-NEXT: [[VAL_0:%.*]] = phi i32 [ [[TMP21]], [[IF_THEN]] ], [ 0, [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: store i32 [[VAL_0]], ptr [[ARRAYIDX1]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] -; CHECK: for.end: -; CHECK-NEXT: ret void ; entry: @@ -558,18 +463,6 @@ define void @uniform_store(ptr noalias %dst, ptr noalias readonly %src, i64 %n) ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: store i32 [[VAL]], ptr [[DST]], align 4 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] -; CHECK: for.end: -; CHECK-NEXT: ret void ; entry: @@ -629,21 +522,6 @@ define void @simple_fdiv(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[WHILE_BODY:%.*]] -; CHECK: while.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr float, ptr [[SRC]], i64 [[INDEX]] -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr float, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: [[VAL1:%.*]] = load float, ptr [[GEP1]], align 4 -; CHECK-NEXT: [[VAL2:%.*]] = load float, ptr [[GEP2]], align 4 -; CHECK-NEXT: [[RES:%.*]] = fdiv float [[VAL1]], [[VAL2]] -; CHECK-NEXT: store float [[RES]], ptr [[GEP2]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1 -; CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP19:![0-9]+]] -; CHECK: while.end.loopexit: -; CHECK-NEXT: ret void ; entry: br label %while.body @@ -707,21 +585,6 @@ define void @simple_idiv(ptr noalias %dst, ptr noalias %src, i64 %n) #0 { ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[WHILE_BODY:%.*]] -; CHECK: while.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[INDEX]] -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: [[VAL1:%.*]] = load i32, ptr [[GEP1]], align 4 -; CHECK-NEXT: [[VAL2:%.*]] = load i32, ptr [[GEP2]], align 4 -; CHECK-NEXT: [[RES:%.*]] = udiv i32 [[VAL1]], [[VAL2]] -; CHECK-NEXT: store i32 [[RES]], ptr [[GEP2]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1 -; CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP21:![0-9]+]] -; CHECK: while.end.loopexit: -; CHECK-NEXT: ret void ; entry: br label %while.body @@ -770,17 +633,6 @@ define void @simple_memset_trip1024(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[WHILE_BODY:%.*]] -; CHECK: while.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[INDEX]] -; CHECK-NEXT: store i32 [[VAL]], ptr [[GEP]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1 -; CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], 1024 -; CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP23:![0-9]+]] -; CHECK: while.end.loopexit: -; CHECK-NEXT: ret void ; entry: br label %while.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll index 9567123ab8ae3..592dc1c4efd47 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter-out-after "^scalar.ph:" ; This is the loop in c++ being vectorize in this file with ;vector.reverse ; #pragma clang loop vectorize_width(8, scalable) interleave_count(2) @@ -58,22 +58,6 @@ define void @vector_reverse_f64(i64 %N, ptr noalias %a, ptr noalias %b) #0{ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_MOD_VF]], [[MIDDLE_BLOCK]] ], [ [[N]], [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: ret void -; CHECK: for.body: -; CHECK-NEXT: [[I_08_IN:%.*]] = phi i64 [ [[I_08:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[I_08]] = add nsw i64 [[I_08_IN]], -1 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[I_08]] -; CHECK-NEXT: [[TMP26:%.*]] = load double, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP26]], 1.000000e+00 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_08]] -; CHECK-NEXT: store double [[ADD]], ptr [[ARRAYIDX1]], align 8 -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[I_08_IN]], 1 -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]] ; entry: %cmp7 = icmp sgt i64 %N, 0 @@ -153,22 +137,6 @@ define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_MOD_VF]], [[MIDDLE_BLOCK]] ], [ [[N]], [[FOR_BODY_PREHEADER]] ], [ [[N]], [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.cond.cleanup.loopexit: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: ret void -; CHECK: for.body: -; CHECK-NEXT: [[I_09_IN:%.*]] = phi i64 [ [[I_09:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[I_09]] = add nsw i64 [[I_09_IN]], -1 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[I_09]] -; CHECK-NEXT: [[TMP29:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[ADD:%.*]] = add i64 [[TMP29]], 1 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I_09]] -; CHECK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX2]], align 8 -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[I_09_IN]], 1 -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT]], !llvm.loop [[LOOP6:![0-9]+]] ; entry: %cmp8 = icmp sgt i64 %N, 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-epilogue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-epilogue.ll index 84fc963833cf2..54ba0a8c4d6bc 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-epilogue.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-epilogue.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter-out-after "^for.body:" --version 3 ; RUN: opt < %s -mattr=+sve2 -passes=loop-vectorize,instcombine -enable-histogram-loop-vectorization -sve-gather-overhead=2 -sve-scatter-overhead=2 -epilogue-vectorization-minimum-VF=4 -debug-only=loop-vectorize -force-vector-interleave=1 -S 2>&1 | FileCheck %s ; REQUIRES: asserts @@ -69,19 +69,6 @@ define void @simple_histogram(ptr noalias %buckets, ptr readonly %indices, i64 % ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY1:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY1]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]] -; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP12]] to i64 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[BUCKETS]], i64 [[IDXPROM1]] -; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1 -; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY1]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: for.exit: -; CHECK-NEXT: ret void ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-too-many-deps.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-too-many-deps.ll index c430e72cea703..b292e43046731 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-too-many-deps.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-too-many-deps.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter-out-after "^scalar.ph:" --version 3 ; RUN: opt < %s -mattr=+sve2 -passes=loop-vectorize,instcombine -enable-histogram-loop-vectorization -sve-gather-overhead=2 -sve-scatter-overhead=2 -force-vector-interleave=1 -max-dependences=2 -debug-only=loop-vectorize,loop-accesses -S 2>&1 | FileCheck %s ; RUN: opt < %s -mattr=+sve2 -passes=loop-vectorize,instcombine -enable-histogram-loop-vectorization -sve-gather-overhead=2 -sve-scatter-overhead=2 -force-vector-interleave=1 -debug-only=loop-vectorize,loop-accesses -S 2>&1 | FileCheck %s --check-prefix=NORMAL_DEP_LIMIT ; REQUIRES: asserts @@ -99,29 +99,6 @@ define void @many_deps(ptr noalias %buckets, ptr %array, ptr %indices, ptr %othe ; NORMAL_DEP_LIMIT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; NORMAL_DEP_LIMIT-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] ; NORMAL_DEP_LIMIT: scalar.ph: -; NORMAL_DEP_LIMIT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] -; NORMAL_DEP_LIMIT-NEXT: br label [[FOR_BODY1:%.*]] -; NORMAL_DEP_LIMIT: for.body: -; NORMAL_DEP_LIMIT-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], [[FOR_BODY1]] ] -; NORMAL_DEP_LIMIT-NEXT: [[GEP_INDICES1:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV1]] -; NORMAL_DEP_LIMIT-NEXT: [[L_IDX:%.*]] = load i32, ptr [[GEP_INDICES1]], align 4 -; NORMAL_DEP_LIMIT-NEXT: [[IDXPROM1:%.*]] = zext i32 [[L_IDX]] to i64 -; NORMAL_DEP_LIMIT-NEXT: [[GEP_BUCKET:%.*]] = getelementptr inbounds nuw i32, ptr [[BUCKETS]], i64 [[IDXPROM1]] -; NORMAL_DEP_LIMIT-NEXT: [[L_BUCKET:%.*]] = load i32, ptr [[GEP_BUCKET]], align 4 -; NORMAL_DEP_LIMIT-NEXT: [[INC:%.*]] = add nsw i32 [[L_BUCKET]], 1 -; NORMAL_DEP_LIMIT-NEXT: store i32 [[INC]], ptr [[GEP_BUCKET]], align 4 -; NORMAL_DEP_LIMIT-NEXT: [[IDX_ADDR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[IV1]] -; NORMAL_DEP_LIMIT-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV1]] to i32 -; NORMAL_DEP_LIMIT-NEXT: store i32 [[IV_TRUNC]], ptr [[IDX_ADDR]], align 4 -; NORMAL_DEP_LIMIT-NEXT: [[GEP_OTHER:%.*]] = getelementptr inbounds i32, ptr [[OTHER]], i64 [[IV1]] -; NORMAL_DEP_LIMIT-NEXT: [[L_OTHER:%.*]] = load i32, ptr [[GEP_OTHER]], align 4 -; NORMAL_DEP_LIMIT-NEXT: [[ADD_OTHER:%.*]] = add i32 [[L_OTHER]], [[IV_TRUNC]] -; NORMAL_DEP_LIMIT-NEXT: store i32 [[ADD_OTHER]], ptr [[GEP_OTHER]], align 4 -; NORMAL_DEP_LIMIT-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 -; NORMAL_DEP_LIMIT-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N]] -; NORMAL_DEP_LIMIT-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY1]], !llvm.loop [[LOOP11:![0-9]+]] -; NORMAL_DEP_LIMIT: for.exit: -; NORMAL_DEP_LIMIT-NEXT: ret void ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll index 3b00312959d8a..dd1f77582e0be 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter-out-after "^scalar.ph:" --version 3 ; RUN: opt < %s -passes=loop-vectorize,instcombine -enable-histogram-loop-vectorization -sve-gather-overhead=2 -sve-scatter-overhead=2 -debug-only=loop-vectorize -S 2>&1 | FileCheck %s ; REQUIRES: asserts @@ -56,22 +56,6 @@ define void @simple_histogram(ptr noalias %buckets, ptr readonly %indices, i64 % ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]] -; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP12]] to i64 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[BUCKETS]], i64 [[IDXPROM1]] -; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1 -; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] -; CHECK: for.exit: -; CHECK-NEXT: ret void ; entry: br label %for.body @@ -122,22 +106,6 @@ define void @simple_histogram_inc_param(ptr noalias %buckets, ptr readonly %indi ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]] -; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP12]] to i64 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[BUCKETS]], i64 [[IDXPROM1]] -; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], [[INCVAL]] -; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] -; CHECK: for.exit: -; CHECK-NEXT: ret void ; entry: br label %for.body @@ -188,22 +156,6 @@ define void @simple_histogram_sub(ptr noalias %buckets, ptr readonly %indices, i ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]] -; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[IDXPROM1:%.*]] = sext i32 [[TMP12]] to i64 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], i64 [[IDXPROM1]] -; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], -1 -; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] -; CHECK: for.exit: -; CHECK-NEXT: ret void ; entry: br label %for.body @@ -257,29 +209,6 @@ define void @conditional_histogram(ptr noalias %buckets, ptr readonly %indices, ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY1:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[NEXT:%.*]] ] -; CHECK-NEXT: [[CONDIDX:%.*]] = getelementptr inbounds i32, ptr [[CONDS]], i64 [[IV1]] -; CHECK-NEXT: [[CONDDATA:%.*]] = load i32, ptr [[CONDIDX]], align 4 -; CHECK-NEXT: [[IFCOND:%.*]] = icmp sgt i32 [[CONDDATA]], 5100 -; CHECK-NEXT: br i1 [[IFCOND]], label [[IFTRUE:%.*]], label [[NEXT]] -; CHECK: iftrue: -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV1]] -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP1]] to i64 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw i32, ptr [[BUCKETS]], i64 [[IDXPROM1]] -; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4 -; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP15]], 1 -; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX3]], align 4 -; CHECK-NEXT: br label [[NEXT]] -; CHECK: next: -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY1]], !llvm.loop [[LOOP9:![0-9]+]] -; CHECK: for.exit: -; CHECK-NEXT: ret void ; entry: br label %for.body @@ -339,22 +268,6 @@ define void @histogram_8bit(ptr noalias %buckets, ptr readonly %indices, i64 %N) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY1:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], [[FOR_BODY1]] ] -; CHECK-NEXT: [[GEP_INDICES:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV1]] -; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_INDICES]], align 4 -; CHECK-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP0]] to i64 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[BUCKETS]], i64 [[IDXPROM1]] -; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[INC:%.*]] = add nsw i8 [[TMP1]], 1 -; CHECK-NEXT: store i8 [[INC]], ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY1]], !llvm.loop [[LOOP11:![0-9]+]] -; CHECK: for.exit: -; CHECK-NEXT: ret void ; entry: br label %for.body @@ -499,22 +412,6 @@ define void @simple_histogram_user_interleave(ptr noalias %buckets, ptr readonly ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]] -; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP12]] to i64 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[BUCKETS]], i64 [[IDXPROM1]] -; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1 -; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] -; CHECK: for.exit: -; CHECK-NEXT: ret void ; entry: br label %for.body @@ -569,22 +466,6 @@ define void @histogram_array_3op_gep(i64 noundef %N) #0 { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1048576 x i32], ptr @idx_array, i64 0, i64 [[IV]] -; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP9]] to i64 -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1048576 x i32], ptr @data_array, i64 0, i64 [[IDXPROM5]] -; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4 -; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 -; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX6]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] -; CHECK: for.exit: -; CHECK-NEXT: ret void ; entry: br label %for.body @@ -639,22 +520,6 @@ define void @histogram_array_4op_gep_nonzero_const_idx(i64 noundef %N, ptr reado ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY1:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], [[FOR_BODY1]] ] -; CHECK-NEXT: [[GEP_INDICES:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV1]] -; CHECK-NEXT: [[L_IDX:%.*]] = load i32, ptr [[GEP_INDICES]], align 4 -; CHECK-NEXT: [[IDXPROM5:%.*]] = sext i32 [[L_IDX]] to i64 -; CHECK-NEXT: [[GEP_BUCKET:%.*]] = getelementptr inbounds [[SOMESTRUCT]], ptr [[DATA_STRUCT]], i64 1, i32 0, i64 [[IDXPROM5]] -; CHECK-NEXT: [[L_BUCKET:%.*]] = load i32, ptr [[GEP_BUCKET]], align 4 -; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[L_BUCKET]], 1 -; CHECK-NEXT: store i32 [[INC]], ptr [[GEP_BUCKET]], align 4 -; CHECK-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY1]], !llvm.loop [[LOOP19:![0-9]+]] -; CHECK: for.exit: -; CHECK-NEXT: ret void ; entry: br label %for.body @@ -705,11 +570,6 @@ define void @simple_histogram_tailfold(ptr noalias %buckets, ptr readonly %indic ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: br i1 poison, label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] -; CHECK: for.exit: -; CHECK-NEXT: ret void ; entry: br label %for.body @@ -779,25 +639,6 @@ define void @simple_histogram_rtdepcheck(ptr noalias %buckets, ptr %array, ptr % ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]] -; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP17]] to i64 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[BUCKETS]], i64 [[IDXPROM1]] -; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP18]], 1 -; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[IDX_ADDR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[IV]] -; CHECK-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i32 -; CHECK-NEXT: store i32 [[IV_TRUNC]], ptr [[IDX_ADDR]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] -; CHECK: for.exit: -; CHECK-NEXT: ret void ; entry: br label %for.body @@ -892,21 +733,6 @@ define void @simple_histogram_64b(ptr noalias %buckets, ptr readonly %indices, i ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[GEP_INDICES:%.*]] = getelementptr inbounds i64, ptr [[INDICES]], i64 [[IV]] -; CHECK-NEXT: [[L_IDX:%.*]] = load i64, ptr [[GEP_INDICES]], align 4 -; CHECK-NEXT: [[GEP_BUCKET:%.*]] = getelementptr inbounds i64, ptr [[BUCKETS]], i64 [[L_IDX]] -; CHECK-NEXT: [[L_BUCKET:%.*]] = load i64, ptr [[GEP_BUCKET]], align 4 -; CHECK-NEXT: [[INC:%.*]] = add nsw i64 [[L_BUCKET]], 1 -; CHECK-NEXT: store i64 [[INC]], ptr [[GEP_BUCKET]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] -; CHECK: for.exit: -; CHECK-NEXT: ret void ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll index de4b265b155b6..e29b15b8991e0 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll @@ -55,16 +55,12 @@ define i8 @test_early_exit_max_tc_less_than_16(ptr dereferenceable(16) %A) nosyn ; VF8UF2: [[VECTOR_PH]]: ; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF8UF2: [[VECTOR_BODY]]: -; VF8UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; VF8UF2-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]] +; VF8UF2-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 0 ; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P_SRC]], i32 0 ; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 ; VF8UF2-NEXT: [[TMP3:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer -; VF8UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; VF8UF2-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP3]]) -; VF8UF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; VF8UF2-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]] -; VF8UF2-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; VF8UF2-NEXT: br label %[[MIDDLE_SPLIT:.*]] ; VF8UF2: [[MIDDLE_SPLIT]]: ; VF8UF2-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] ; VF8UF2: [[MIDDLE_BLOCK]]: @@ -83,7 +79,7 @@ define i8 @test_early_exit_max_tc_less_than_16(ptr dereferenceable(16) %A) nosyn ; VF8UF2: [[LOOP_LATCH]]: ; VF8UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV1]], 1 ; VF8UF2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16 -; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP0:![0-9]+]] ; VF8UF2: [[EXIT]]: ; VF8UF2-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ] ; VF8UF2-NEXT: ret i8 [[RES]] @@ -95,16 +91,12 @@ define i8 @test_early_exit_max_tc_less_than_16(ptr dereferenceable(16) %A) nosyn ; VF16UF1: [[VECTOR_PH]]: ; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]] ; VF16UF1: [[VECTOR_BODY]]: -; VF16UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; VF16UF1-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]] +; VF16UF1-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 0 ; VF16UF1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P_SRC]], i32 0 ; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; VF16UF1-NEXT: [[TMP3:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], zeroinitializer -; VF16UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; VF16UF1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]]) -; VF16UF1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; VF16UF1-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]] -; VF16UF1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; VF16UF1-NEXT: br label %[[MIDDLE_SPLIT:.*]] ; VF16UF1: [[MIDDLE_SPLIT]]: ; VF16UF1-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] ; VF16UF1: [[MIDDLE_BLOCK]]: @@ -123,7 +115,7 @@ define i8 @test_early_exit_max_tc_less_than_16(ptr dereferenceable(16) %A) nosyn ; VF16UF1: [[LOOP_LATCH]]: ; VF16UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV1]], 1 ; VF16UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16 -; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP0:![0-9]+]] ; VF16UF1: [[EXIT]]: ; VF16UF1-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ] ; VF16UF1-NEXT: ret i8 [[RES]] @@ -198,23 +190,19 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer ; VF8UF2: [[VECTOR_PH]]: ; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF8UF2: [[VECTOR_BODY]]: -; VF8UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; VF8UF2-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]] +; VF8UF2-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 0 ; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P_SRC]], i32 0 ; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 ; VF8UF2-NEXT: [[TMP3:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer -; VF8UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; VF8UF2-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP3]]) -; VF8UF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; VF8UF2-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]] -; VF8UF2-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; VF8UF2-NEXT: br label %[[MIDDLE_SPLIT:.*]] ; VF8UF2: [[MIDDLE_SPLIT]]: ; VF8UF2-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] ; VF8UF2: [[MIDDLE_BLOCK]]: ; VF8UF2-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; VF8UF2: [[VECTOR_EARLY_EXIT]]: ; VF8UF2-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP3]], i1 true) -; VF8UF2-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]] +; VF8UF2-NEXT: [[TMP5:%.*]] = add i64 0, [[FIRST_ACTIVE_LANE]] ; VF8UF2-NEXT: br label %[[EXIT]] ; VF8UF2: [[SCALAR_PH]]: ; VF8UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] @@ -228,9 +216,9 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer ; VF8UF2: [[LOOP_LATCH]]: ; VF8UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV1]], 1 ; VF8UF2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16 -; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]] +; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] ; VF8UF2: [[EXIT]]: -; VF8UF2-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[TMP8]], %[[VECTOR_EARLY_EXIT]] ] +; VF8UF2-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[TMP5]], %[[VECTOR_EARLY_EXIT]] ] ; VF8UF2-NEXT: ret i64 [[RES]] ; ; VF16UF1-LABEL: define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside( @@ -240,23 +228,19 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer ; VF16UF1: [[VECTOR_PH]]: ; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]] ; VF16UF1: [[VECTOR_BODY]]: -; VF16UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; VF16UF1-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]] +; VF16UF1-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 0 ; VF16UF1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P_SRC]], i32 0 ; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; VF16UF1-NEXT: [[TMP3:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], zeroinitializer -; VF16UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; VF16UF1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]]) -; VF16UF1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; VF16UF1-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]] -; VF16UF1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; VF16UF1-NEXT: br label %[[MIDDLE_SPLIT:.*]] ; VF16UF1: [[MIDDLE_SPLIT]]: ; VF16UF1-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] ; VF16UF1: [[MIDDLE_BLOCK]]: ; VF16UF1-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; VF16UF1: [[VECTOR_EARLY_EXIT]]: ; VF16UF1-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v16i1(<16 x i1> [[TMP3]], i1 true) -; VF16UF1-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]] +; VF16UF1-NEXT: [[TMP5:%.*]] = add i64 0, [[FIRST_ACTIVE_LANE]] ; VF16UF1-NEXT: br label %[[EXIT]] ; VF16UF1: [[SCALAR_PH]]: ; VF16UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] @@ -270,9 +254,9 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer ; VF16UF1: [[LOOP_LATCH]]: ; VF16UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV1]], 1 ; VF16UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16 -; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]] +; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] ; VF16UF1: [[EXIT]]: -; VF16UF1-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[TMP8]], %[[VECTOR_EARLY_EXIT]] ] +; VF16UF1-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[TMP5]], %[[VECTOR_EARLY_EXIT]] ] ; VF16UF1-NEXT: ret i64 [[RES]] ; entry: diff --git a/llvm/test/tools/llvm-objdump/MachO/ARM/symbolized-disassembly.test b/llvm/test/tools/llvm-objdump/MachO/ARM/symbolized-disassembly.test index 730a000a373ff..f86188206b3be 100644 --- a/llvm/test/tools/llvm-objdump/MachO/ARM/symbolized-disassembly.test +++ b/llvm/test/tools/llvm-objdump/MachO/ARM/symbolized-disassembly.test @@ -1,8 +1,8 @@ // RUN: llvm-objdump -d -m --no-show-raw-insn --full-leading-addr --print-imm-hex %p/Inputs/hello.obj.macho-arm | FileCheck %s --check-prefix=OBJ // RUN: llvm-objdump -d -m --no-show-raw-insn --full-leading-addr --print-imm-hex %p/Inputs/hello.exe.macho-arm | FileCheck %s --check-prefix=EXE -OBJ: 00000006 40 f2 24 03 movw r3, :lower16:((54-14)-4) -OBJ: 0000000a c0 f2 00 03 movt r3, :upper16:((54-14)-4) +OBJ: 00000006 40 f2 24 03 movw r3, :lower16:(54-14-4) +OBJ: 0000000a c0 f2 00 03 movt r3, :upper16:(54-14-4) OBJ: 00000024 ff f7 ec ff bl _printf EXE: 0000bfa8 00 f0 28 e8 blx 0xbffc @ symbol stub for: _printf diff --git a/llvm/test/tools/llvm-objdump/MachO/ARM/symbolized-subtractor.test b/llvm/test/tools/llvm-objdump/MachO/ARM/symbolized-subtractor.test index 65df2a984cd02..bf452c2948a00 100644 --- a/llvm/test/tools/llvm-objdump/MachO/ARM/symbolized-subtractor.test +++ b/llvm/test/tools/llvm-objdump/MachO/ARM/symbolized-subtractor.test @@ -11,5 +11,5 @@ PCinst: .section __TEXT,__cstring,cstring_literals Str: .asciz "Hello world\n" -# CHECK: movw r3, :lower16:((Str-PCinst)-4) -# CHECK: movt r3, :upper16:((Str-PCinst)-4) +# CHECK: movw r3, :lower16:(Str-PCinst-4) +# CHECK: movt r3, :upper16:(Str-PCinst-4) diff --git a/llvm/tools/bugpoint/CrashDebugger.cpp b/llvm/tools/bugpoint/CrashDebugger.cpp index a7777f778f66e..e2f7e104c58e3 100644 --- a/llvm/tools/bugpoint/CrashDebugger.cpp +++ b/llvm/tools/bugpoint/CrashDebugger.cpp @@ -390,9 +390,7 @@ bool ReduceCrashingFunctionAttributes::TestFuncAttrs( // Pass along the set of attributes that caused the crash. Attrs.clear(); - for (Attribute A : NewAttrs.getFnAttrs()) { - Attrs.push_back(A); - } + llvm::append_range(Attrs, NewAttrs.getFnAttrs()); return true; } return false; @@ -800,8 +798,7 @@ bool ReduceCrashingInstructions::TestInsts( // Make sure to use instruction pointers that point into the now-current // module, and that they don't include any deleted blocks. Insts.clear(); - for (Instruction *Inst : Instructions) - Insts.push_back(Inst); + llvm::append_range(Insts, Instructions); return true; } // It didn't crash, try something else. @@ -870,8 +867,7 @@ bool ReduceCrashingMetadata::TestInsts(std::vector &Insts) { // Make sure to use instruction pointers that point into the now-current // module, and that they don't include any deleted blocks. Insts.clear(); - for (Instruction *I : Instructions) - Insts.push_back(I); + llvm::append_range(Insts, Instructions); return true; } // It didn't crash, try something else. @@ -1211,8 +1207,7 @@ static Error DebugACrash(BugDriver &BD, BugTester TestFn) { assert(Fn && "Could not find function?"); std::vector Attrs; - for (Attribute A : Fn->getAttributes().getFnAttrs()) - Attrs.push_back(A); + llvm::append_range(Attrs, Fn->getAttributes().getFnAttrs()); OldSize += Attrs.size(); Expected Result = @@ -1319,8 +1314,7 @@ static Error DebugACrash(BugDriver &BD, BugTester TestFn) { // contribute to the crash, bisect the operands of the remaining ones std::vector NamedMDOps; for (auto &NamedMD : BD.getProgram().named_metadata()) - for (auto *op : NamedMD.operands()) - NamedMDOps.push_back(op); + llvm::append_range(NamedMDOps, NamedMD.operands()); Expected Result = ReduceCrashingNamedMDOps(BD, TestFn).reduceList(NamedMDOps); if (Error E = Result.takeError()) diff --git a/llvm/tools/bugpoint/OptimizerDriver.cpp b/llvm/tools/bugpoint/OptimizerDriver.cpp index 0b29a1f17d879..56a0fa4d5ec9e 100644 --- a/llvm/tools/bugpoint/OptimizerDriver.cpp +++ b/llvm/tools/bugpoint/OptimizerDriver.cpp @@ -203,8 +203,7 @@ bool BugDriver::runPasses(Module &Program, } else Args.push_back(tool); - for (unsigned i = 0, e = OptArgs.size(); i != e; ++i) - Args.push_back(OptArgs[i]); + llvm::append_range(Args, OptArgs); // Pin to legacy PM since bugpoint has lots of infra and hacks revolving // around the legacy PM. Args.push_back("-bugpoint-enable-legacy-pm"); diff --git a/llvm/tools/bugpoint/ToolRunner.cpp b/llvm/tools/bugpoint/ToolRunner.cpp index e45c89b746aeb..f2f5966ad9d04 100644 --- a/llvm/tools/bugpoint/ToolRunner.cpp +++ b/llvm/tools/bugpoint/ToolRunner.cpp @@ -181,13 +181,11 @@ Expected LLI::ExecuteProgram(const std::string &Bitcode, } // Add any extra LLI args. - for (unsigned i = 0, e = ToolArgs.size(); i != e; ++i) - LLIArgs.push_back(ToolArgs[i]); + llvm::append_range(LLIArgs, ToolArgs); LLIArgs.push_back(Bitcode); // Add optional parameters to the running program from Argv - for (unsigned i = 0, e = Args.size(); i != e; ++i) - LLIArgs.push_back(Args[i]); + llvm::append_range(LLIArgs, Args); outs() << ""; outs().flush(); @@ -268,13 +266,11 @@ Error CustomCompiler::compileProgram(const std::string &Bitcode, std::vector ProgramArgs; ProgramArgs.push_back(CompilerCommand); - for (const auto &Arg : CompilerArgs) - ProgramArgs.push_back(Arg); + llvm::append_range(ProgramArgs, CompilerArgs); ProgramArgs.push_back(Bitcode); // Add optional parameters to the running program from Argv - for (const auto &Arg : CompilerArgs) - ProgramArgs.push_back(Arg); + llvm::append_range(ProgramArgs, CompilerArgs); if (RunProgramWithTimeout(CompilerCommand, ProgramArgs, "", "", "", Timeout, MemoryLimit)) @@ -317,13 +313,11 @@ Expected CustomExecutor::ExecuteProgram( std::vector ProgramArgs; ProgramArgs.push_back(ExecutionCommand); - for (std::size_t i = 0; i < ExecutorArgs.size(); ++i) - ProgramArgs.push_back(ExecutorArgs[i]); + llvm::append_range(ProgramArgs, ExecutorArgs); ProgramArgs.push_back(Bitcode); // Add optional parameters to the running program from Argv - for (unsigned i = 0, e = Args.size(); i != e; ++i) - ProgramArgs.push_back(Args[i]); + llvm::append_range(ProgramArgs, Args); return RunProgramWithTimeout(ExecutionCommand, ProgramArgs, InputFile, OutputFile, OutputFile, Timeout, MemoryLimit); @@ -447,8 +441,7 @@ Expected LLC::OutputCode(const std::string &Bitcode, LLCArgs.push_back(LLCPath); // Add any extra LLC args. - for (unsigned i = 0, e = ToolArgs.size(); i != e; ++i) - LLCArgs.push_back(ToolArgs[i]); + llvm::append_range(LLCArgs, ToolArgs); LLCArgs.push_back("-o"); LLCArgs.push_back(OutputAsmFile); // Output to the Asm file @@ -563,8 +556,7 @@ Expected JIT::ExecuteProgram(const std::string &Bitcode, JITArgs.push_back("-force-interpreter=false"); // Add any extra LLI args. - for (unsigned i = 0, e = ToolArgs.size(); i != e; ++i) - JITArgs.push_back(ToolArgs[i]); + llvm::append_range(JITArgs, ToolArgs); for (unsigned i = 0, e = SharedLibs.size(); i != e; ++i) { JITArgs.push_back("-load"); @@ -572,8 +564,7 @@ Expected JIT::ExecuteProgram(const std::string &Bitcode, } JITArgs.push_back(Bitcode); // Add optional parameters to the running program from Argv - for (unsigned i = 0, e = Args.size(); i != e; ++i) - JITArgs.push_back(Args[i]); + llvm::append_range(JITArgs, Args); outs() << ""; outs().flush(); @@ -674,8 +665,7 @@ Expected CC::ExecuteProgram(const std::string &ProgramFile, // most likely -L and -l options that need to come before other libraries but // after the source. Other options won't be sensitive to placement on the // command line, so this should be safe. - for (unsigned i = 0, e = ArgsForCC.size(); i != e; ++i) - CCArgs.push_back(ArgsForCC[i]); + llvm::append_range(CCArgs, ArgsForCC); CCArgs.push_back("-lm"); // Hard-code the math library... CCArgs.push_back("-O2"); // Optimize the program a bit... @@ -725,8 +715,7 @@ Expected CC::ExecuteProgram(const std::string &ProgramFile, } // Add optional parameters to the running program from Argv - for (unsigned i = 0, e = Args.size(); i != e; ++i) - ProgramArgs.push_back(Args[i]); + llvm::append_range(ProgramArgs, Args); // Now that we have a binary, run it! outs() << ""; @@ -823,8 +812,7 @@ Error CC::MakeSharedObject(const std::string &InputFile, FileType fileType, // most likely -L and -l options that need to come before other libraries but // after the source. Other options won't be sensitive to placement on the // command line, so this should be safe. - for (unsigned i = 0, e = ArgsForCC.size(); i != e; ++i) - CCArgs.push_back(ArgsForCC[i]); + llvm::append_range(CCArgs, ArgsForCC); outs() << ""; outs().flush(); diff --git a/llvm/tools/dsymutil/Reproducer.cpp b/llvm/tools/dsymutil/Reproducer.cpp index a6cc10424dc52..31e49cdd0518c 100644 --- a/llvm/tools/dsymutil/Reproducer.cpp +++ b/llvm/tools/dsymutil/Reproducer.cpp @@ -36,8 +36,7 @@ Reproducer::~Reproducer() = default; ReproducerGenerate::ReproducerGenerate(std::error_code &EC, int Argc, char **Argv, bool GenerateOnExit) : Root(createReproducerDir(EC)), GenerateOnExit(GenerateOnExit) { - for (int I = 0; I < Argc; ++I) - Args.push_back(Argv[I]); + llvm::append_range(Args, ArrayRef(Argv, Argc)); if (!Root.empty()) FC = std::make_shared(Root, Root); VFS = FileCollector::createCollectorVFS(vfs::getRealFileSystem(), FC); diff --git a/llvm/tools/llvm-cov/CodeCoverage.cpp b/llvm/tools/llvm-cov/CodeCoverage.cpp index 921f283deedc7..a740cdd45b901 100644 --- a/llvm/tools/llvm-cov/CodeCoverage.cpp +++ b/llvm/tools/llvm-cov/CodeCoverage.cpp @@ -588,8 +588,7 @@ void CodeCoverageTool::demangleSymbols(const CoverageMapping &Coverage) { // Invoke the demangler. std::vector ArgsV; ArgsV.reserve(ViewOpts.DemanglerOpts.size()); - for (StringRef Arg : ViewOpts.DemanglerOpts) - ArgsV.push_back(Arg); + llvm::append_range(ArgsV, ViewOpts.DemanglerOpts); std::optional Redirects[] = { InputPath.str(), OutputPath.str(), {""}}; std::string ErrMsg; diff --git a/llvm/tools/llvm-debuginfod/llvm-debuginfod.cpp b/llvm/tools/llvm-debuginfod/llvm-debuginfod.cpp index 2859a36c80b0b..7b85166c1b4ae 100644 --- a/llvm/tools/llvm-debuginfod/llvm-debuginfod.cpp +++ b/llvm/tools/llvm-debuginfod/llvm-debuginfod.cpp @@ -126,8 +126,7 @@ int llvm_debuginfod_main(int argc, char **argv, const llvm::ToolContext &) { parseArgs(argc, argv); SmallVector Paths; - for (const std::string &Path : ScanPaths) - Paths.push_back(Path); + llvm::append_range(Paths, ScanPaths); DefaultThreadPool Pool(hardware_concurrency(MaxConcurrency)); DebuginfodLog Log; diff --git a/llvm/tools/llvm-libtool-darwin/DependencyInfo.h b/llvm/tools/llvm-libtool-darwin/DependencyInfo.h index 7b2f94bdbeb81..784ec3f50cd53 100644 --- a/llvm/tools/llvm-libtool-darwin/DependencyInfo.h +++ b/llvm/tools/llvm-libtool-darwin/DependencyInfo.h @@ -50,8 +50,7 @@ class DependencyInfo { // Sort the input by its names. std::vector InputNames; InputNames.reserve(Inputs.size()); - for (const auto &F : Inputs) - InputNames.push_back(F); + llvm::append_range(InputNames, Inputs); llvm::sort(InputNames); for (const auto &In : InputNames) diff --git a/llvm/tools/llvm-lipo/llvm-lipo.cpp b/llvm/tools/llvm-lipo/llvm-lipo.cpp index 3c0197e8b7bac..8c588021391b4 100644 --- a/llvm/tools/llvm-lipo/llvm-lipo.cpp +++ b/llvm/tools/llvm-lipo/llvm-lipo.cpp @@ -249,8 +249,8 @@ static Config parseLipoOptions(ArrayRef ArgsArr) { switch (ActionArgs[0]->getOption().getID()) { case LIPO_verify_arch: - for (auto A : InputArgs.getAllArgValues(LIPO_verify_arch)) - C.VerifyArchList.push_back(A); + llvm::append_range(C.VerifyArchList, + InputArgs.getAllArgValues(LIPO_verify_arch)); if (C.VerifyArchList.empty()) reportError( "verify_arch requires at least one architecture to be specified"); diff --git a/llvm/tools/llvm-lto2/llvm-lto2.cpp b/llvm/tools/llvm-lto2/llvm-lto2.cpp index 4c9b47d78a1bb..76ff11b8d6412 100644 --- a/llvm/tools/llvm-lto2/llvm-lto2.cpp +++ b/llvm/tools/llvm-lto2/llvm-lto2.cpp @@ -326,8 +326,7 @@ static int run(int argc, char **argv) { Conf.OptLevel = OptLevel - '0'; Conf.Freestanding = EnableFreestanding; - for (auto &PluginFN : PassPlugins) - Conf.PassPlugins.push_back(PluginFN); + llvm::append_range(Conf.PassPlugins, PassPlugins); if (auto Level = CodeGenOpt::parseLevel(CGOptLevel)) { Conf.CGOptLevel = *Level; } else { diff --git a/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp b/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp index 680218e3fc96c..4cb64bdbe8ef9 100644 --- a/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp +++ b/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp @@ -1538,8 +1538,7 @@ int main(int Argc, const char **Argv) { // Initialize the filters for LinePrinter. auto propagate = [&](auto &Target, auto &Reference) { - for (std::string &Option : Reference) - Target.push_back(Option); + llvm::append_range(Target, Reference); }; propagate(opts::Filters.ExcludeTypes, opts::pretty::ExcludeTypes); diff --git a/llvm/tools/llvm-rc/llvm-rc.cpp b/llvm/tools/llvm-rc/llvm-rc.cpp index a77188c462afe..6feadc5f259b3 100644 --- a/llvm/tools/llvm-rc/llvm-rc.cpp +++ b/llvm/tools/llvm-rc/llvm-rc.cpp @@ -266,8 +266,7 @@ void preprocess(StringRef Src, StringRef Dst, const RcOptions &Opts, } } } - for (const auto &S : Opts.PreprocessArgs) - Args.push_back(S); + llvm::append_range(Args, Opts.PreprocessArgs); Args.push_back(Src); Args.push_back("-o"); Args.push_back(Dst); diff --git a/llvm/tools/llvm-reduce/DeltaManager.cpp b/llvm/tools/llvm-reduce/DeltaManager.cpp index 624b5306bc71b..5281b1d5aebf2 100644 --- a/llvm/tools/llvm-reduce/DeltaManager.cpp +++ b/llvm/tools/llvm-reduce/DeltaManager.cpp @@ -12,9 +12,8 @@ //===----------------------------------------------------------------------===// #include "DeltaManager.h" -#include "ReducerWorkItem.h" +#include "DeltaPass.h" #include "TestRunner.h" -#include "deltas/Delta.h" #include "deltas/ReduceAliases.h" #include "deltas/ReduceArguments.h" #include "deltas/ReduceAttributes.h" @@ -71,91 +70,56 @@ static cl::list "default, run all delta passes."), cl::cat(LLVMReduceOptions), cl::CommaSeparated); -#define DELTA_PASSES \ - do { \ - DELTA_PASS("strip-debug-info", stripDebugInfoDeltaPass) \ - DELTA_PASS("functions", reduceFunctionsDeltaPass) \ - DELTA_PASS("function-bodies", reduceFunctionBodiesDeltaPass) \ - DELTA_PASS("special-globals", reduceSpecialGlobalsDeltaPass) \ - DELTA_PASS("aliases", reduceAliasesDeltaPass) \ - DELTA_PASS("ifuncs", reduceIFuncsDeltaPass) \ - DELTA_PASS("simplify-conditionals-true", reduceConditionalsTrueDeltaPass) \ - DELTA_PASS("simplify-conditionals-false", \ - reduceConditionalsFalseDeltaPass) \ - DELTA_PASS("invokes", reduceInvokesDeltaPass) \ - DELTA_PASS("unreachable-basic-blocks", \ - reduceUnreachableBasicBlocksDeltaPass) \ - DELTA_PASS("basic-blocks", reduceBasicBlocksDeltaPass) \ - DELTA_PASS("simplify-cfg", reduceUsingSimplifyCFGDeltaPass) \ - DELTA_PASS("function-data", reduceFunctionDataDeltaPass) \ - DELTA_PASS("global-values", reduceGlobalValuesDeltaPass) \ - DELTA_PASS("global-objects", reduceGlobalObjectsDeltaPass) \ - DELTA_PASS("global-initializers", reduceGlobalsInitializersDeltaPass) \ - DELTA_PASS("global-variables", reduceGlobalsDeltaPass) \ - DELTA_PASS("di-metadata", reduceDIMetadataDeltaPass) \ - DELTA_PASS("dbg-records", reduceDbgRecordDeltaPass) \ - DELTA_PASS("distinct-metadata", reduceDistinctMetadataDeltaPass) \ - DELTA_PASS("metadata", reduceMetadataDeltaPass) \ - DELTA_PASS("named-metadata", reduceNamedMetadataDeltaPass) \ - DELTA_PASS("arguments", reduceArgumentsDeltaPass) \ - DELTA_PASS("instructions", reduceInstructionsDeltaPass) \ - DELTA_PASS("simplify-instructions", simplifyInstructionsDeltaPass) \ - DELTA_PASS("ir-passes", runIRPassesDeltaPass) \ - DELTA_PASS("operands-zero", reduceOperandsZeroDeltaPass) \ - DELTA_PASS("operands-one", reduceOperandsOneDeltaPass) \ - DELTA_PASS("operands-nan", reduceOperandsNaNDeltaPass) \ - DELTA_PASS("operands-to-args", reduceOperandsToArgsDeltaPass) \ - DELTA_PASS("operands-skip", reduceOperandsSkipDeltaPass) \ - DELTA_PASS("operand-bundles", reduceOperandBundesDeltaPass) \ - DELTA_PASS("attributes", reduceAttributesDeltaPass) \ - DELTA_PASS("module-data", reduceModuleDataDeltaPass) \ - DELTA_PASS("opcodes", reduceOpcodesDeltaPass) \ - DELTA_PASS("volatile", reduceVolatileInstructionsDeltaPass) \ - DELTA_PASS("atomic-ordering", reduceAtomicOrderingDeltaPass) \ - DELTA_PASS("syncscopes", reduceAtomicSyncScopesDeltaPass) \ - DELTA_PASS("instruction-flags", reduceInstructionFlagsDeltaPass) \ - } while (false) - -#define DELTA_PASSES_MIR \ - do { \ - DELTA_PASS("instructions", reduceInstructionsMIRDeltaPass) \ - DELTA_PASS("ir-instruction-references", \ - reduceIRInstructionReferencesDeltaPass) \ - DELTA_PASS("ir-block-references", reduceIRBlockReferencesDeltaPass) \ - DELTA_PASS("ir-function-references", reduceIRFunctionReferencesDeltaPass) \ - DELTA_PASS("instruction-flags", reduceInstructionFlagsMIRDeltaPass) \ - DELTA_PASS("register-uses", reduceRegisterUsesMIRDeltaPass) \ - DELTA_PASS("register-defs", reduceRegisterDefsMIRDeltaPass) \ - DELTA_PASS("register-hints", reduceVirtualRegisterHintsDeltaPass) \ - DELTA_PASS("register-masks", reduceRegisterMasksMIRDeltaPass) \ - } while (false) +// Generate two separate Pass lists: IR_Passes and MIR_Passes +static const DeltaPass IR_Passes[] = { +#undef DELTA_PASS_IR +#undef DELTA_PASS_MIR +#define DELTA_PASS_IR(NAME, FUNC, DESC) {NAME, FUNC, DESC}, +#include "DeltaPasses.def" +#undef DELTA_PASS_IR +}; + +static const DeltaPass MIR_Passes[] = { +#undef DELTA_PASS_IR +#undef DELTA_PASS_MIR +#define DELTA_PASS_MIR(NAME, FUNC, DESC) {NAME, FUNC, DESC}, +#include "DeltaPasses.def" +#undef DELTA_PASS_MIR +}; static void runAllDeltaPasses(TestRunner &Tester, const SmallStringSet &SkipPass) { -#define DELTA_PASS(NAME, FUNC) \ - if (!SkipPass.count(NAME)) { \ - FUNC(Tester); \ - } if (Tester.getProgram().isMIR()) { - DELTA_PASSES_MIR; + for (const DeltaPass &Pass : MIR_Passes) { + if (!SkipPass.count(Pass.Name)) { + runDeltaPass(Tester, Pass); + } + } } else { - DELTA_PASSES; + for (const DeltaPass &Pass : IR_Passes) { + if (!SkipPass.count(Pass.Name)) { + runDeltaPass(Tester, Pass); + } + } } -#undef DELTA_PASS } static void runDeltaPassName(TestRunner &Tester, StringRef PassName) { -#define DELTA_PASS(NAME, FUNC) \ - if (PassName == NAME) { \ - FUNC(Tester); \ - return; \ - } if (Tester.getProgram().isMIR()) { - DELTA_PASSES_MIR; + for (const DeltaPass &Pass : MIR_Passes) { + if (PassName == Pass.Name) { + runDeltaPass(Tester, Pass); + return; + } + } } else { - DELTA_PASSES; + for (const DeltaPass &Pass : IR_Passes) { + if (PassName == Pass.Name) { + runDeltaPass(Tester, Pass); + return; + } + } } -#undef DELTA_PASS // We should have errored on unrecognized passes before trying to run // anything. @@ -164,24 +128,25 @@ static void runDeltaPassName(TestRunner &Tester, StringRef PassName) { void llvm::printDeltaPasses(raw_ostream &OS) { OS << "Delta passes (pass to `--delta-passes=` as a comma separated list):\n"; -#define DELTA_PASS(NAME, FUNC) OS << " " << NAME << "\n"; OS << " IR:\n"; - DELTA_PASSES; + for (const DeltaPass &Pass : IR_Passes) { + OS << " " << Pass.Name << '\n'; + } OS << " MIR:\n"; - DELTA_PASSES_MIR; -#undef DELTA_PASS + for (const DeltaPass &Pass : MIR_Passes) { + OS << " " << Pass.Name << '\n'; + } } // Built a set of available delta passes. static void collectPassNames(const TestRunner &Tester, SmallStringSet &NameSet) { -#define DELTA_PASS(NAME, FUNC) NameSet.insert(NAME); - if (Tester.getProgram().isMIR()) { - DELTA_PASSES_MIR; - } else { - DELTA_PASSES; + for (const DeltaPass &Pass : MIR_Passes) { + NameSet.insert(Pass.Name); + } + for (const DeltaPass &Pass : IR_Passes) { + NameSet.insert(Pass.Name); } -#undef DELTA_PASS } /// Verify all requested or skipped passes are valid names, and return them in a diff --git a/llvm/tools/llvm-reduce/DeltaPass.h b/llvm/tools/llvm-reduce/DeltaPass.h new file mode 100644 index 0000000000000..3231570bd23f4 --- /dev/null +++ b/llvm/tools/llvm-reduce/DeltaPass.h @@ -0,0 +1,24 @@ +//===--- DeltaPass.h - Delta Pass Structure --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_REDUCE_DELTAPASS_H +#define LLVM_TOOLS_LLVM_REDUCE_DELTAPASS_H + +#include "ReducerWorkItem.h" +#include "deltas/Delta.h" +#include "llvm/ADT/StringRef.h" + +namespace llvm { +struct DeltaPass { + StringRef Name; // e.g., "strip-debug-info" + void (*Func)(Oracle &, ReducerWorkItem &); // e.g., stripDebugInfoDeltaPass + StringRef Desc; // e.g., "Stripping Debug Info" +}; +} // namespace llvm + +#endif diff --git a/llvm/tools/llvm-reduce/DeltaPasses.def b/llvm/tools/llvm-reduce/DeltaPasses.def new file mode 100644 index 0000000000000..060daf198c76a --- /dev/null +++ b/llvm/tools/llvm-reduce/DeltaPasses.def @@ -0,0 +1,68 @@ +//===--- DeltaPasses.def - Delta Pass Definitions --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + + +#ifndef DELTA_PASS_IR +#define DELTA_PASS_IR(NAME, FUNC, DESC) +#endif +DELTA_PASS_IR("strip-debug-info", stripDebugInfoDeltaPass, "Stripping Debug Info") +DELTA_PASS_IR("functions", reduceFunctionsDeltaPass, "Reducing Functions") +DELTA_PASS_IR("function-bodies", reduceFunctionBodiesDeltaPass, "Reducing Function Bodies") +DELTA_PASS_IR("special-globals", reduceSpecialGlobalsDeltaPass, "Reducing Special Globals") +DELTA_PASS_IR("aliases", reduceAliasesDeltaPass, "Reducing Aliases") +DELTA_PASS_IR("ifuncs", reduceIFuncsDeltaPass, "Reducing Ifuncs") +DELTA_PASS_IR("simplify-conditionals-true", reduceConditionalsTrueDeltaPass, "Reducing conditional branches to true") +DELTA_PASS_IR("simplify-conditionals-false", + reduceConditionalsFalseDeltaPass, "Reducing conditional branches to false") +DELTA_PASS_IR("invokes", reduceInvokesDeltaPass, "Reducing Invokes") +DELTA_PASS_IR("unreachable-basic-blocks", + reduceUnreachableBasicBlocksDeltaPass, "Removing Unreachable Basic Blocks") +DELTA_PASS_IR("basic-blocks", reduceBasicBlocksDeltaPass, "Reducing Basic Blocks") +DELTA_PASS_IR("simplify-cfg", reduceUsingSimplifyCFGDeltaPass, "Reducing using SimplifyCFG") +DELTA_PASS_IR("function-data", reduceFunctionDataDeltaPass, "Reducing Function Data") +DELTA_PASS_IR("global-values", reduceGlobalValuesDeltaPass, "Reducing GlobalValues") +DELTA_PASS_IR("global-objects", reduceGlobalObjectsDeltaPass, "Reducing GlobalObjects") +DELTA_PASS_IR("global-initializers", reduceGlobalsInitializersDeltaPass, "Reducing GV Initializers") +DELTA_PASS_IR("global-variables", reduceGlobalsDeltaPass, "Reducing GlobalVariables") +DELTA_PASS_IR("di-metadata", reduceDIMetadataDeltaPass, "Reducing DIMetadata") +DELTA_PASS_IR("dbg-records", reduceDbgRecordDeltaPass, "Reducing DbgRecords") +DELTA_PASS_IR("distinct-metadata", reduceDistinctMetadataDeltaPass, "Reducing Distinct Metadata") +DELTA_PASS_IR("metadata", reduceMetadataDeltaPass,"Reducing Metadata") +DELTA_PASS_IR("named-metadata", reduceNamedMetadataDeltaPass, "Reducing Named Metadata") +DELTA_PASS_IR("arguments", reduceArgumentsDeltaPass, "Reducing Arguments") +DELTA_PASS_IR("instructions", reduceInstructionsDeltaPass, "Reducing Instructions") +DELTA_PASS_IR("simplify-instructions", simplifyInstructionsDeltaPass, "Simplifying Instructions") +DELTA_PASS_IR("ir-passes", runIRPassesDeltaPass, "Running passes") +DELTA_PASS_IR("operands-zero", reduceOperandsZeroDeltaPass, "Reducing Operands to zero") +DELTA_PASS_IR("operands-one", reduceOperandsOneDeltaPass, "Reducing Operands to one") +DELTA_PASS_IR("operands-nan", reduceOperandsNaNDeltaPass, "Reducing Operands to NaN") +DELTA_PASS_IR("operands-to-args", reduceOperandsToArgsDeltaPass, "Converting operands to function arguments") +DELTA_PASS_IR("operands-skip", reduceOperandsSkipDeltaPass, "Reducing operands by skipping over instructions") +DELTA_PASS_IR("operand-bundles", reduceOperandBundesDeltaPass, "Reducing Operand Bundles") +DELTA_PASS_IR("attributes", reduceAttributesDeltaPass, "Reducing Attributes") +DELTA_PASS_IR("module-data", reduceModuleDataDeltaPass, "Reducing Module Data") +DELTA_PASS_IR("opcodes", reduceOpcodesDeltaPass, "Reducing Opcodes") +DELTA_PASS_IR("volatile", reduceVolatileInstructionsDeltaPass, "Reducing Volatile Instructions") +DELTA_PASS_IR("atomic-ordering", reduceAtomicOrderingDeltaPass, "Reducing Atomic Ordering") +DELTA_PASS_IR("syncscopes", reduceAtomicSyncScopesDeltaPass, "Reducing Atomic Sync Scopes") +DELTA_PASS_IR("instruction-flags", reduceInstructionFlagsDeltaPass, "Reducing Instruction Flags") + + +#ifndef DELTA_PASS_MIR +#define DELTA_PASS_MIR(NAME, FUNC, DESC) +#endif +DELTA_PASS_MIR("instructions", reduceInstructionsMIRDeltaPass, "Reducing Instructions") +DELTA_PASS_MIR("ir-instruction-references", + reduceIRInstructionReferencesDeltaPass, "Reducing IR references from instructions") +DELTA_PASS_MIR("ir-block-references", reduceIRBlockReferencesDeltaPass, "Reducing IR references from blocks") +DELTA_PASS_MIR("ir-function-references", reduceIRFunctionReferencesDeltaPass, "Reducing IR references from functions") +DELTA_PASS_MIR("instruction-flags", reduceInstructionFlagsMIRDeltaPass, "Reducing Instruction Flags") +DELTA_PASS_MIR("register-uses", reduceRegisterUsesMIRDeltaPass, "Reducing register uses") +DELTA_PASS_MIR("register-defs", reduceRegisterDefsMIRDeltaPass, "Reducing register defs") +DELTA_PASS_MIR("register-hints", reduceVirtualRegisterHintsDeltaPass, "Reducing virtual register hints from functions") +DELTA_PASS_MIR("register-masks", reduceRegisterMasksMIRDeltaPass, "Reducing register masks") diff --git a/llvm/tools/llvm-reduce/deltas/Delta.cpp b/llvm/tools/llvm-reduce/deltas/Delta.cpp index 6f84b6c09d145..d4106b0243aea 100644 --- a/llvm/tools/llvm-reduce/deltas/Delta.cpp +++ b/llvm/tools/llvm-reduce/deltas/Delta.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "Delta.h" +#include "DeltaPass.h" #include "ReducerWorkItem.h" #include "TestRunner.h" #include "Utils.h" @@ -62,6 +63,10 @@ static cl::opt NumJobs( unsigned NumJobs = 1; #endif +static StringLiteral SeparatorLine = + "--------------------------------------------------------------------------" + "------\n"; + /// Splits Chunks in half and prints them. /// If unable to split (when chunk size is 1) returns false. static bool increaseGranularity(std::vector &Chunks) { @@ -180,11 +185,10 @@ using SharedTaskQueue = std::deque>>; /// reduces the amount of chunks that are considered interesting by the /// given test. The number of chunks is determined by a preliminary run of the /// reduction pass where no change must be made to the module. -void llvm::runDeltaPass(TestRunner &Test, ReductionFunc ExtractChunksFromModule, - StringRef Message) { +void llvm::runDeltaPass(TestRunner &Test, const DeltaPass &Pass) { assert(!Test.getProgram().verify(&errs()) && "input module is broken before making changes"); - errs() << "*** " << Message << "...\n"; + errs() << "*** " << Pass.Desc << " (" << Pass.Name << ")...\n"; int Targets; { @@ -193,7 +197,7 @@ void llvm::runDeltaPass(TestRunner &Test, ReductionFunc ExtractChunksFromModule, // made. std::vector AllChunks = {{0, INT_MAX}}; Oracle Counter(AllChunks); - ExtractChunksFromModule(Counter, Test.getProgram()); + Pass.Func(Counter, Test.getProgram()); Targets = Counter.count(); assert(!Test.getProgram().verify(&errs()) && @@ -215,7 +219,7 @@ void llvm::runDeltaPass(TestRunner &Test, ReductionFunc ExtractChunksFromModule, Oracle NoChunksCounter(NoChunks); std::unique_ptr Clone = Test.getProgram().clone(Test.getTargetMachine()); - ExtractChunksFromModule(NoChunksCounter, *Clone); + Pass.Func(NoChunksCounter, *Clone); assert(Targets == NoChunksCounter.count() && "number of chunks changes when reducing"); #endif @@ -223,7 +227,7 @@ void llvm::runDeltaPass(TestRunner &Test, ReductionFunc ExtractChunksFromModule, if (!Targets) { if (Verbose) errs() << "\nNothing to reduce\n"; - errs() << "----------------------------\n"; + errs() << SeparatorLine; return; } @@ -281,9 +285,8 @@ void llvm::runDeltaPass(TestRunner &Test, ReductionFunc ExtractChunksFromModule, Chunk ChunkToCheck = *(I + J); TaskQueue.emplace_back(ChunkThreadPool.async( ProcessChunkFromSerializedBitcode, ChunkToCheck, std::ref(Test), - ExtractChunksFromModule, UninterestingChunks, - ChunksStillConsideredInteresting, OriginalBC, - std::ref(AnyReduced))); + Pass.Func, UninterestingChunks, ChunksStillConsideredInteresting, + OriginalBC, std::ref(AnyReduced))); } // Start processing results of the queued tasks. We wait for the first @@ -305,7 +308,7 @@ void llvm::runDeltaPass(TestRunner &Test, ReductionFunc ExtractChunksFromModule, Chunk ChunkToCheck = *(I + NumScheduledTasks); TaskQueue.emplace_back(ChunkThreadPool.async( ProcessChunkFromSerializedBitcode, ChunkToCheck, - std::ref(Test), ExtractChunksFromModule, UninterestingChunks, + std::ref(Test), Pass.Func, UninterestingChunks, ChunksStillConsideredInteresting, OriginalBC, std::ref(AnyReduced))); } @@ -330,10 +333,9 @@ void llvm::runDeltaPass(TestRunner &Test, ReductionFunc ExtractChunksFromModule, // Forward I to the last chunk processed in parallel. I += NumChunksProcessed - 1; } else { - Result = - CheckChunk(*I, Test.getProgram().clone(Test.getTargetMachine()), - Test, ExtractChunksFromModule, UninterestingChunks, - ChunksStillConsideredInteresting); + Result = CheckChunk( + *I, Test.getProgram().clone(Test.getTargetMachine()), Test, + Pass.Func, UninterestingChunks, ChunksStillConsideredInteresting); } if (!Result) @@ -361,5 +363,5 @@ void llvm::runDeltaPass(TestRunner &Test, ReductionFunc ExtractChunksFromModule, } if (Verbose) errs() << "Couldn't increase anymore.\n"; - errs() << "----------------------------\n"; + errs() << SeparatorLine; } diff --git a/llvm/tools/llvm-reduce/deltas/Delta.h b/llvm/tools/llvm-reduce/deltas/Delta.h index 96fcea89484c0..ec2311f067299 100644 --- a/llvm/tools/llvm-reduce/deltas/Delta.h +++ b/llvm/tools/llvm-reduce/deltas/Delta.h @@ -24,6 +24,7 @@ namespace llvm { class TestRunner; +struct DeltaPass; struct Chunk { int Begin; @@ -134,8 +135,7 @@ using ReductionFunc = function_ref; /// /// Other implementations of the Delta Debugging algorithm can also be found in /// the CReduce, Delta, and Lithium projects. -void runDeltaPass(TestRunner &Test, ReductionFunc ExtractChunksFromModule, - StringRef Message); +void runDeltaPass(TestRunner &Test, const DeltaPass &Pass); } // namespace llvm #endif diff --git a/llvm/tools/llvm-reduce/deltas/ReduceAliases.cpp b/llvm/tools/llvm-reduce/deltas/ReduceAliases.cpp index 2f2df549b6728..00d7ce9bd763d 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceAliases.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceAliases.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "ReduceAliases.h" -#include "Delta.h" #include "Utils.h" #include "llvm/IR/Constants.h" #include "llvm/IR/GlobalValue.h" @@ -22,7 +21,7 @@ using namespace llvm; /// Removes all aliases aren't inside any of the /// desired Chunks. -static void extractAliasesFromModule(Oracle &O, ReducerWorkItem &Program) { +void llvm::reduceAliasesDeltaPass(Oracle &O, ReducerWorkItem &Program) { for (auto &GA : make_early_inc_range(Program.getModule().aliases())) { if (!O.shouldKeep()) { GA.replaceAllUsesWith(GA.getAliasee()); @@ -31,7 +30,7 @@ static void extractAliasesFromModule(Oracle &O, ReducerWorkItem &Program) { } } -static void extractIFuncsFromModule(Oracle &O, ReducerWorkItem &WorkItem) { +void llvm::reduceIFuncsDeltaPass(Oracle &O, ReducerWorkItem &WorkItem) { Module &Mod = WorkItem.getModule(); std::vector IFuncs; @@ -43,11 +42,3 @@ static void extractIFuncsFromModule(Oracle &O, ReducerWorkItem &WorkItem) { if (!IFuncs.empty()) lowerGlobalIFuncUsersAsGlobalCtor(Mod, IFuncs); } - -void llvm::reduceAliasesDeltaPass(TestRunner &Test) { - runDeltaPass(Test, extractAliasesFromModule, "Reducing Aliases"); -} - -void llvm::reduceIFuncsDeltaPass(TestRunner &Test) { - runDeltaPass(Test, extractIFuncsFromModule, "Reducing Ifuncs"); -} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceAliases.h b/llvm/tools/llvm-reduce/deltas/ReduceAliases.h index 404677d221ca4..ce0b4443d080f 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceAliases.h +++ b/llvm/tools/llvm-reduce/deltas/ReduceAliases.h @@ -17,8 +17,8 @@ #include "Delta.h" namespace llvm { -void reduceAliasesDeltaPass(TestRunner &Test); -void reduceIFuncsDeltaPass(TestRunner &Test); +void reduceAliasesDeltaPass(Oracle &O, ReducerWorkItem &Program); +void reduceIFuncsDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); } // namespace llvm #endif diff --git a/llvm/tools/llvm-reduce/deltas/ReduceArguments.cpp b/llvm/tools/llvm-reduce/deltas/ReduceArguments.cpp index b16f512ff6166..fdac995af32f6 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceArguments.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceArguments.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "ReduceArguments.h" -#include "Delta.h" #include "Utils.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/Constants.h" @@ -20,6 +19,8 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Operator.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" #include #include @@ -112,7 +113,7 @@ static bool allFuncUsersRewritable(const Function &F) { /// Removes out-of-chunk arguments from functions, and modifies their calls /// accordingly. It also removes allocations of out-of-chunk arguments. -static void extractArgumentsFromModule(Oracle &O, ReducerWorkItem &WorkItem) { +void llvm::reduceArgumentsDeltaPass(Oracle &O, ReducerWorkItem &WorkItem) { Module &Program = WorkItem.getModule(); std::vector InitArgsToKeep; std::vector Funcs; @@ -177,7 +178,3 @@ static void extractArgumentsFromModule(Oracle &O, ReducerWorkItem &WorkItem) { F->eraseFromParent(); } } - -void llvm::reduceArgumentsDeltaPass(TestRunner &Test) { - runDeltaPass(Test, extractArgumentsFromModule, "Reducing Arguments"); -} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceArguments.h b/llvm/tools/llvm-reduce/deltas/ReduceArguments.h index 5adcfe89266bc..ceb8d79bc13fd 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceArguments.h +++ b/llvm/tools/llvm-reduce/deltas/ReduceArguments.h @@ -15,12 +15,9 @@ #define LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEARGUMENTS_H #include "Delta.h" -#include "llvm/IR/Argument.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h" namespace llvm { -void reduceArgumentsDeltaPass(TestRunner &Test); +void reduceArgumentsDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); } // namespace llvm #endif diff --git a/llvm/tools/llvm-reduce/deltas/ReduceAttributes.cpp b/llvm/tools/llvm-reduce/deltas/ReduceAttributes.cpp index 1f497089e18fc..63d7abe61bda7 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceAttributes.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceAttributes.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "ReduceAttributes.h" -#include "Delta.h" #include "TestRunner.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" @@ -166,11 +165,7 @@ class AttributeRemapper : public InstVisitor { } // namespace /// Removes out-of-chunk attributes from module. -static void extractAttributesFromModule(Oracle &O, ReducerWorkItem &WorkItem) { +void llvm::reduceAttributesDeltaPass(Oracle &O, ReducerWorkItem &WorkItem) { AttributeRemapper R(O, WorkItem.getModule()); R.visit(WorkItem.getModule()); } - -void llvm::reduceAttributesDeltaPass(TestRunner &Test) { - runDeltaPass(Test, extractAttributesFromModule, "Reducing Attributes"); -} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceAttributes.h b/llvm/tools/llvm-reduce/deltas/ReduceAttributes.h index a2e9955ac5bb4..663f6d8c23a3b 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceAttributes.h +++ b/llvm/tools/llvm-reduce/deltas/ReduceAttributes.h @@ -14,9 +14,10 @@ #ifndef LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEATTRIBUTES_H #define LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEATTRIBUTES_H +#include "Delta.h" + namespace llvm { -class TestRunner; -void reduceAttributesDeltaPass(TestRunner &Test); +void reduceAttributesDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); } // namespace llvm #endif diff --git a/llvm/tools/llvm-reduce/deltas/ReduceBasicBlocks.cpp b/llvm/tools/llvm-reduce/deltas/ReduceBasicBlocks.cpp index da363df77d0c0..5656fdda764a4 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceBasicBlocks.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceBasicBlocks.cpp @@ -23,8 +23,8 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" - #include #define DEBUG_TYPE "llvm-reduce" @@ -141,7 +141,7 @@ removeUninterestingBBsFromSwitch(SwitchInst &SwInst, /// Removes out-of-chunk arguments from functions, and modifies their calls /// accordingly. It also removes allocations of out-of-chunk arguments. -static void extractBasicBlocksFromModule(Oracle &O, ReducerWorkItem &WorkItem) { +void llvm::reduceBasicBlocksDeltaPass(Oracle &O, ReducerWorkItem &WorkItem) { DenseSet BBsToDelete; df_iterator_default_set Reachable; @@ -188,12 +188,8 @@ static void extractBasicBlocksFromModule(Oracle &O, ReducerWorkItem &WorkItem) { } } -void llvm::reduceBasicBlocksDeltaPass(TestRunner &Test) { - runDeltaPass(Test, extractBasicBlocksFromModule, "Reducing Basic Blocks"); -} - -static void removeUnreachableBasicBlocksFromModule(Oracle &O, - ReducerWorkItem &WorkItem) { +void llvm::reduceUnreachableBasicBlocksDeltaPass(Oracle &O, + ReducerWorkItem &WorkItem) { std::vector DeadBlocks; df_iterator_default_set Reachable; @@ -219,8 +215,3 @@ static void removeUnreachableBasicBlocksFromModule(Oracle &O, Reachable.clear(); } } - -void llvm::reduceUnreachableBasicBlocksDeltaPass(TestRunner &Test) { - runDeltaPass(Test, removeUnreachableBasicBlocksFromModule, - "Removing Unreachable Basic Blocks"); -} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceBasicBlocks.h b/llvm/tools/llvm-reduce/deltas/ReduceBasicBlocks.h index a090d675ef822..b7a3b2867ae35 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceBasicBlocks.h +++ b/llvm/tools/llvm-reduce/deltas/ReduceBasicBlocks.h @@ -14,12 +14,11 @@ #define LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEBASICBLOCKS_H #include "Delta.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h" namespace llvm { -void reduceBasicBlocksDeltaPass(TestRunner &Test); -void reduceUnreachableBasicBlocksDeltaPass(TestRunner &Test); +void reduceBasicBlocksDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); +void reduceUnreachableBasicBlocksDeltaPass(Oracle &O, + ReducerWorkItem &WorkItem); } // namespace llvm #endif diff --git a/llvm/tools/llvm-reduce/deltas/ReduceDIMetadata.cpp b/llvm/tools/llvm-reduce/deltas/ReduceDIMetadata.cpp index 9dcf722fd1d90..8d1d73785f567 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceDIMetadata.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceDIMetadata.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "ReduceDIMetadata.h" -#include "Delta.h" #include "llvm/ADT/Sequence.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" @@ -76,7 +75,7 @@ void identifyUninterestingMDNodes(Oracle &O, MDNodeList &MDs) { } } -static void extractDIMetadataFromModule(Oracle &O, ReducerWorkItem &WorkItem) { +void llvm::reduceDIMetadataDeltaPass(Oracle &O, ReducerWorkItem &WorkItem) { Module &Program = WorkItem.getModule(); MDNodeList MDs; @@ -94,7 +93,3 @@ static void extractDIMetadataFromModule(Oracle &O, ReducerWorkItem &WorkItem) { } identifyUninterestingMDNodes(O, MDs); } - -void llvm::reduceDIMetadataDeltaPass(TestRunner &Test) { - runDeltaPass(Test, extractDIMetadataFromModule, "Reducing DIMetadata"); -} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceDIMetadata.h b/llvm/tools/llvm-reduce/deltas/ReduceDIMetadata.h index 379c14a0db200..d9976fc3a2902 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceDIMetadata.h +++ b/llvm/tools/llvm-reduce/deltas/ReduceDIMetadata.h @@ -14,10 +14,10 @@ #ifndef LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEDIMETADATA_H #define LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEDIMETADATA_H -#include "TestRunner.h" +#include "Delta.h" namespace llvm { -void reduceDIMetadataDeltaPass(TestRunner &Test); +void reduceDIMetadataDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); } // namespace llvm #endif diff --git a/llvm/tools/llvm-reduce/deltas/ReduceDbgRecords.cpp b/llvm/tools/llvm-reduce/deltas/ReduceDbgRecords.cpp index 25de659109c9f..4de942d459b69 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceDbgRecords.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceDbgRecords.cpp @@ -20,10 +20,12 @@ #include "ReduceDbgRecords.h" #include "Utils.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DebugProgramInstruction.h" using namespace llvm; -static void extractDbgRecordsFromModule(Oracle &O, ReducerWorkItem &WorkItem) { +void llvm::reduceDbgRecordDeltaPass(Oracle &O, ReducerWorkItem &WorkItem) { Module &M = WorkItem.getModule(); for (auto &F : M) @@ -33,7 +35,3 @@ static void extractDbgRecordsFromModule(Oracle &O, ReducerWorkItem &WorkItem) { if (!O.shouldKeep()) DR.eraseFromParent(); } - -void llvm::reduceDbgRecordDeltaPass(TestRunner &Test) { - runDeltaPass(Test, extractDbgRecordsFromModule, "Reducing DbgRecords"); -} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceDbgRecords.h b/llvm/tools/llvm-reduce/deltas/ReduceDbgRecords.h index 07a1e04fceaee..a122465e2a628 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceDbgRecords.h +++ b/llvm/tools/llvm-reduce/deltas/ReduceDbgRecords.h @@ -15,11 +15,9 @@ #define LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEDBGRECORDS_H #include "Delta.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/DebugProgramInstruction.h" namespace llvm { -void reduceDbgRecordDeltaPass(TestRunner &Test); +void reduceDbgRecordDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); } // namespace llvm #endif diff --git a/llvm/tools/llvm-reduce/deltas/ReduceDistinctMetadata.cpp b/llvm/tools/llvm-reduce/deltas/ReduceDistinctMetadata.cpp index 0f46409977a33..4b3c5f58fe549 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceDistinctMetadata.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceDistinctMetadata.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "ReduceDistinctMetadata.h" -#include "Delta.h" #include "llvm/ADT/Sequence.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" @@ -118,8 +117,8 @@ static void cleanUpTemporaries(NamedMDNode &NamedNode, MDTuple *TemporaryTuple, } } -static void extractDistinctMetadataFromModule(Oracle &O, - ReducerWorkItem &WorkItem) { +void llvm::reduceDistinctMetadataDeltaPass(Oracle &O, + ReducerWorkItem &WorkItem) { Module &Program = WorkItem.getModule(); MDTuple *TemporaryTuple = MDTuple::getDistinct(Program.getContext(), SmallVector{}); @@ -135,8 +134,3 @@ static void extractDistinctMetadataFromModule(Oracle &O, for (NamedMDNode &NamedNode : Program.named_metadata()) cleanUpTemporaries(NamedNode, TemporaryTuple, Program); } - -void llvm::reduceDistinctMetadataDeltaPass(TestRunner &Test) { - runDeltaPass(Test, extractDistinctMetadataFromModule, - "Reducing Distinct Metadata"); -} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceDistinctMetadata.h b/llvm/tools/llvm-reduce/deltas/ReduceDistinctMetadata.h index d02e8e6107b75..e7a817c173b07 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceDistinctMetadata.h +++ b/llvm/tools/llvm-reduce/deltas/ReduceDistinctMetadata.h @@ -14,10 +14,10 @@ #ifndef LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEDISTINCTMETADATA_H #define LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEDISTINCTMETADATA_H -#include "TestRunner.h" +#include "Delta.h" namespace llvm { -void reduceDistinctMetadataDeltaPass(TestRunner &Test); +void reduceDistinctMetadataDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); } // namespace llvm #endif diff --git a/llvm/tools/llvm-reduce/deltas/ReduceFunctionBodies.cpp b/llvm/tools/llvm-reduce/deltas/ReduceFunctionBodies.cpp index 21875ba00cf8b..af0ff996a1c13 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceFunctionBodies.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceFunctionBodies.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "ReduceFunctionBodies.h" -#include "Delta.h" #include "Utils.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Instructions.h" @@ -21,8 +20,7 @@ using namespace llvm; /// Removes all the bodies of defined functions that aren't inside any of the /// desired Chunks. -static void extractFunctionBodiesFromModule(Oracle &O, - ReducerWorkItem &WorkItem) { +void llvm::reduceFunctionBodiesDeltaPass(Oracle &O, ReducerWorkItem &WorkItem) { // Delete out-of-chunk function bodies for (auto &F : WorkItem.getModule()) { if (!F.isDeclaration() && !hasAliasUse(F) && !O.shouldKeep()) { @@ -32,12 +30,7 @@ static void extractFunctionBodiesFromModule(Oracle &O, } } -void llvm::reduceFunctionBodiesDeltaPass(TestRunner &Test) { - runDeltaPass(Test, extractFunctionBodiesFromModule, - "Reducing Function Bodies"); -} - -static void reduceFunctionData(Oracle &O, ReducerWorkItem &WorkItem) { +void llvm::reduceFunctionDataDeltaPass(Oracle &O, ReducerWorkItem &WorkItem) { for (Function &F : WorkItem.getModule()) { if (F.hasPersonalityFn()) { if (none_of(F, @@ -56,7 +49,3 @@ static void reduceFunctionData(Oracle &O, ReducerWorkItem &WorkItem) { F.setPrologueData(nullptr); } } - -void llvm::reduceFunctionDataDeltaPass(TestRunner &Test) { - runDeltaPass(Test, reduceFunctionData, "Reducing Function Data"); -} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceFunctionBodies.h b/llvm/tools/llvm-reduce/deltas/ReduceFunctionBodies.h index ae738fb1b88e3..720fb6eb26654 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceFunctionBodies.h +++ b/llvm/tools/llvm-reduce/deltas/ReduceFunctionBodies.h @@ -17,8 +17,8 @@ #include "Delta.h" namespace llvm { -void reduceFunctionBodiesDeltaPass(TestRunner &Test); -void reduceFunctionDataDeltaPass(TestRunner &Test); +void reduceFunctionBodiesDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); +void reduceFunctionDataDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); } // namespace llvm #endif diff --git a/llvm/tools/llvm-reduce/deltas/ReduceFunctions.cpp b/llvm/tools/llvm-reduce/deltas/ReduceFunctions.cpp index 619811c89202e..44f1e52204f2f 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceFunctions.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceFunctions.cpp @@ -13,9 +13,9 @@ //===----------------------------------------------------------------------===// #include "ReduceFunctions.h" -#include "Delta.h" #include "Utils.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include @@ -23,7 +23,7 @@ using namespace llvm; /// Removes all the Defined Functions /// that aren't inside any of the desired Chunks. -static void extractFunctionsFromModule(Oracle &O, ReducerWorkItem &WorkItem) { +void llvm::reduceFunctionsDeltaPass(Oracle &O, ReducerWorkItem &WorkItem) { Module &Program = WorkItem.getModule(); // Record all out-of-chunk functions. @@ -54,7 +54,3 @@ static void extractFunctionsFromModule(Oracle &O, ReducerWorkItem &WorkItem) { cast(F)->eraseFromParent(); } } - -void llvm::reduceFunctionsDeltaPass(TestRunner &Test) { - runDeltaPass(Test, extractFunctionsFromModule, "Reducing Functions"); -} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceFunctions.h b/llvm/tools/llvm-reduce/deltas/ReduceFunctions.h index d3ff0d9511289..6f4e61c8c1e86 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceFunctions.h +++ b/llvm/tools/llvm-reduce/deltas/ReduceFunctions.h @@ -15,10 +15,9 @@ #define LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEFUNCTIONS_H #include "Delta.h" -#include "llvm/Transforms/Utils/Cloning.h" namespace llvm { -void reduceFunctionsDeltaPass(TestRunner &Test); +void reduceFunctionsDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); } // namespace llvm #endif diff --git a/llvm/tools/llvm-reduce/deltas/ReduceGlobalObjects.cpp b/llvm/tools/llvm-reduce/deltas/ReduceGlobalObjects.cpp index 1d1463a055bd8..64bf711f23d52 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceGlobalObjects.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceGlobalObjects.cpp @@ -19,7 +19,7 @@ static bool shouldReduceAlign(GlobalObject &GO) { static bool shouldReduceComdat(GlobalObject &GO) { return GO.hasComdat(); } -static void reduceGOs(Oracle &O, ReducerWorkItem &Program) { +void llvm::reduceGlobalObjectsDeltaPass(Oracle &O, ReducerWorkItem &Program) { for (auto &GO : Program.getModule().global_objects()) { if (shouldReduceSection(GO) && !O.shouldKeep()) GO.setSection(""); @@ -29,7 +29,3 @@ static void reduceGOs(Oracle &O, ReducerWorkItem &Program) { GO.setComdat(nullptr); } } - -void llvm::reduceGlobalObjectsDeltaPass(TestRunner &Test) { - runDeltaPass(Test, reduceGOs, "Reducing GlobalObjects"); -} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceGlobalObjects.h b/llvm/tools/llvm-reduce/deltas/ReduceGlobalObjects.h index 35c38a9ecf212..bca061e3b02cb 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceGlobalObjects.h +++ b/llvm/tools/llvm-reduce/deltas/ReduceGlobalObjects.h @@ -12,7 +12,7 @@ #include "Delta.h" namespace llvm { -void reduceGlobalObjectsDeltaPass(TestRunner &Test); +void reduceGlobalObjectsDeltaPass(Oracle &O, ReducerWorkItem &Program); } // namespace llvm #endif diff --git a/llvm/tools/llvm-reduce/deltas/ReduceGlobalValues.cpp b/llvm/tools/llvm-reduce/deltas/ReduceGlobalValues.cpp index 6e8c21008502f..577e0f5d16b63 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceGlobalValues.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceGlobalValues.cpp @@ -41,7 +41,7 @@ static bool shouldReduceLinkage(GlobalValue &GV) { return !GV.hasExternalLinkage() && !GV.hasAppendingLinkage(); } -static void reduceGVs(Oracle &O, ReducerWorkItem &Program) { +void llvm::reduceGlobalValuesDeltaPass(Oracle &O, ReducerWorkItem &Program) { for (auto &GV : Program.getModule().global_values()) { if (shouldReduceDSOLocal(GV) && !O.shouldKeep()) GV.setDSOLocal(false); @@ -66,7 +66,3 @@ static void reduceGVs(Oracle &O, ReducerWorkItem &Program) { } } } - -void llvm::reduceGlobalValuesDeltaPass(TestRunner &Test) { - runDeltaPass(Test, reduceGVs, "Reducing GlobalValues"); -} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceGlobalValues.h b/llvm/tools/llvm-reduce/deltas/ReduceGlobalValues.h index 19c0707936528..f7dbc90543156 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceGlobalValues.h +++ b/llvm/tools/llvm-reduce/deltas/ReduceGlobalValues.h @@ -17,7 +17,7 @@ #include "Delta.h" namespace llvm { -void reduceGlobalValuesDeltaPass(TestRunner &Test); +void reduceGlobalValuesDeltaPass(Oracle &O, ReducerWorkItem &Program); } // namespace llvm #endif diff --git a/llvm/tools/llvm-reduce/deltas/ReduceGlobalVarInitializers.cpp b/llvm/tools/llvm-reduce/deltas/ReduceGlobalVarInitializers.cpp index 4c7125217f252..e285e6f7ba67f 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceGlobalVarInitializers.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceGlobalVarInitializers.cpp @@ -14,11 +14,14 @@ #include "ReduceGlobalVarInitializers.h" #include "llvm/IR/Constants.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Value.h" +#include "llvm/Transforms/Utils/Cloning.h" using namespace llvm; /// Removes all the Initialized GVs that aren't inside the desired Chunks. -static void extractGVsFromModule(Oracle &O, ReducerWorkItem &WorkItem) { +void llvm::reduceGlobalsInitializersDeltaPass(Oracle &O, + ReducerWorkItem &WorkItem) { // Drop initializers of out-of-chunk GVs for (auto &GV : WorkItem.getModule().globals()) if (GV.hasInitializer() && !O.shouldKeep()) { @@ -27,7 +30,3 @@ static void extractGVsFromModule(Oracle &O, ReducerWorkItem &WorkItem) { GV.setComdat(nullptr); } } - -void llvm::reduceGlobalsInitializersDeltaPass(TestRunner &Test) { - runDeltaPass(Test, extractGVsFromModule, "Reducing GV Initializers"); -} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceGlobalVarInitializers.h b/llvm/tools/llvm-reduce/deltas/ReduceGlobalVarInitializers.h index 318b29b6ca5e4..b3cb075346897 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceGlobalVarInitializers.h +++ b/llvm/tools/llvm-reduce/deltas/ReduceGlobalVarInitializers.h @@ -15,11 +15,9 @@ #define LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEGLOBALVARINITIALIZERS_H #include "Delta.h" -#include "llvm/IR/Value.h" -#include "llvm/Transforms/Utils/Cloning.h" namespace llvm { -void reduceGlobalsInitializersDeltaPass(TestRunner &Test); +void reduceGlobalsInitializersDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); } // namespace llvm #endif diff --git a/llvm/tools/llvm-reduce/deltas/ReduceGlobalVars.cpp b/llvm/tools/llvm-reduce/deltas/ReduceGlobalVars.cpp index b448081ee1a27..ff5f643935b46 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceGlobalVars.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceGlobalVars.cpp @@ -14,6 +14,8 @@ #include "ReduceGlobalVars.h" #include "Utils.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/Value.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/ModuleUtils.h" using namespace llvm; @@ -23,7 +25,7 @@ static bool shouldAlwaysKeep(const GlobalVariable &GV) { } /// Removes all the GVs that aren't inside the desired Chunks. -static void extractGVsFromModule(Oracle &O, ReducerWorkItem &WorkItem) { +void llvm::reduceGlobalsDeltaPass(Oracle &O, ReducerWorkItem &WorkItem) { Module &Program = WorkItem.getModule(); // Get GVs inside desired chunks @@ -53,7 +55,3 @@ static void extractGVsFromModule(Oracle &O, ReducerWorkItem &WorkItem) { cast(GV)->eraseFromParent(); } } - -void llvm::reduceGlobalsDeltaPass(TestRunner &Test) { - runDeltaPass(Test, extractGVsFromModule, "Reducing GlobalVariables"); -} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceGlobalVars.h b/llvm/tools/llvm-reduce/deltas/ReduceGlobalVars.h index 1198dceb45368..eb1a65f2043a9 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceGlobalVars.h +++ b/llvm/tools/llvm-reduce/deltas/ReduceGlobalVars.h @@ -15,11 +15,9 @@ #define LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEGLOBALVARS_H #include "Delta.h" -#include "llvm/IR/Value.h" -#include "llvm/Transforms/Utils/Cloning.h" namespace llvm { -void reduceGlobalsDeltaPass(TestRunner &Test); +void reduceGlobalsDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); } // namespace llvm #endif diff --git a/llvm/tools/llvm-reduce/deltas/ReduceIRReferences.cpp b/llvm/tools/llvm-reduce/deltas/ReduceIRReferences.cpp index 4bb1eb7db1d09..231883fc1f4f3 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceIRReferences.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceIRReferences.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "ReduceIRReferences.h" -#include "Delta.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -37,14 +36,16 @@ static void dropIRReferencesFromInstructions(Oracle &O, MachineFunction &MF) { } } -static void stripIRFromInstructions(Oracle &O, ReducerWorkItem &WorkItem) { +void llvm::reduceIRInstructionReferencesDeltaPass(Oracle &O, + ReducerWorkItem &WorkItem) { for (const Function &F : WorkItem.getModule()) { if (auto *MF = WorkItem.MMI->getMachineFunction(F)) dropIRReferencesFromInstructions(O, *MF); } } -static void stripIRFromBlocks(Oracle &O, ReducerWorkItem &WorkItem) { +void llvm::reduceIRBlockReferencesDeltaPass(Oracle &O, + ReducerWorkItem &WorkItem) { for (const Function &F : WorkItem.getModule()) { if (auto *MF = WorkItem.MMI->getMachineFunction(F)) { for (MachineBasicBlock &MBB : *MF) { @@ -55,7 +56,8 @@ static void stripIRFromBlocks(Oracle &O, ReducerWorkItem &WorkItem) { } } -static void stripIRFromFunctions(Oracle &O, ReducerWorkItem &WorkItem) { +void llvm::reduceIRFunctionReferencesDeltaPass(Oracle &O, + ReducerWorkItem &WorkItem) { for (const Function &F : WorkItem.getModule()) { if (!O.shouldKeep()) { if (auto *MF = WorkItem.MMI->getMachineFunction(F)) { @@ -67,17 +69,3 @@ static void stripIRFromFunctions(Oracle &O, ReducerWorkItem &WorkItem) { } } } - -void llvm::reduceIRInstructionReferencesDeltaPass(TestRunner &Test) { - runDeltaPass(Test, stripIRFromInstructions, - "Reducing IR references from instructions"); -} - -void llvm::reduceIRBlockReferencesDeltaPass(TestRunner &Test) { - runDeltaPass(Test, stripIRFromBlocks, "Reducing IR references from blocks"); -} - -void llvm::reduceIRFunctionReferencesDeltaPass(TestRunner &Test) { - runDeltaPass(Test, stripIRFromFunctions, - "Reducing IR references from functions"); -} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceIRReferences.h b/llvm/tools/llvm-reduce/deltas/ReduceIRReferences.h index 548559a0775b4..4394602911df1 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceIRReferences.h +++ b/llvm/tools/llvm-reduce/deltas/ReduceIRReferences.h @@ -14,17 +14,19 @@ #ifndef LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEIRREFERENCES_MIR_H #define LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEIRREFERENCES_MIR_H +#include "Delta.h" + namespace llvm { -class TestRunner; /// Remove IR references from instructions (i.e. from memory operands) -void reduceIRInstructionReferencesDeltaPass(TestRunner &Test); +void reduceIRInstructionReferencesDeltaPass(Oracle &O, + ReducerWorkItem &WorkItem); /// Remove IR BasicBlock references (the block names) -void reduceIRBlockReferencesDeltaPass(TestRunner &Test); +void reduceIRBlockReferencesDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); /// Remove IR references from function level fields (e.g. frame object names) -void reduceIRFunctionReferencesDeltaPass(TestRunner &Test); +void reduceIRFunctionReferencesDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); } // namespace llvm diff --git a/llvm/tools/llvm-reduce/deltas/ReduceInstructionFlags.cpp b/llvm/tools/llvm-reduce/deltas/ReduceInstructionFlags.cpp index e157747004782..2937550bfec75 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceInstructionFlags.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceInstructionFlags.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "ReduceInstructionFlags.h" -#include "Delta.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -19,7 +18,8 @@ using namespace llvm; -static void reduceFlagsInModule(Oracle &O, ReducerWorkItem &WorkItem) { +void llvm::reduceInstructionFlagsDeltaPass(Oracle &O, + ReducerWorkItem &WorkItem) { // Keep this in sync with computeIRComplexityScoreImpl(). for (Function &F : WorkItem.getModule()) { for (Instruction &I : instructions(F)) { @@ -83,7 +83,3 @@ static void reduceFlagsInModule(Oracle &O, ReducerWorkItem &WorkItem) { } } } - -void llvm::reduceInstructionFlagsDeltaPass(TestRunner &Test) { - runDeltaPass(Test, reduceFlagsInModule, "Reducing Instruction Flags"); -} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceInstructionFlags.h b/llvm/tools/llvm-reduce/deltas/ReduceInstructionFlags.h index 1764c0199da87..005cc8390ab96 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceInstructionFlags.h +++ b/llvm/tools/llvm-reduce/deltas/ReduceInstructionFlags.h @@ -9,10 +9,10 @@ #ifndef LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEINSTRUCTIONFLAGS_H #define LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEINSTRUCTIONFLAGS_H -#include "TestRunner.h" +#include "Delta.h" namespace llvm { -void reduceInstructionFlagsDeltaPass(TestRunner &Test); +void reduceInstructionFlagsDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); } // namespace llvm #endif diff --git a/llvm/tools/llvm-reduce/deltas/ReduceInstructionFlagsMIR.cpp b/llvm/tools/llvm-reduce/deltas/ReduceInstructionFlagsMIR.cpp index f2895b31947ec..70dbd85a8da93 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceInstructionFlagsMIR.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceInstructionFlagsMIR.cpp @@ -14,9 +14,11 @@ #include "ReduceInstructionFlagsMIR.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" + using namespace llvm; -static void removeFlagsFromModule(Oracle &O, ReducerWorkItem &WorkItem) { +void llvm::reduceInstructionFlagsMIRDeltaPass(Oracle &O, + ReducerWorkItem &WorkItem) { for (const Function &F : WorkItem.getModule()) { if (auto *MF = WorkItem.MMI->getMachineFunction(F)) { for (MachineBasicBlock &MBB : *MF) { @@ -29,7 +31,3 @@ static void removeFlagsFromModule(Oracle &O, ReducerWorkItem &WorkItem) { } } } - -void llvm::reduceInstructionFlagsMIRDeltaPass(TestRunner &Test) { - runDeltaPass(Test, removeFlagsFromModule, "Reducing Instruction Flags"); -} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceInstructionFlagsMIR.h b/llvm/tools/llvm-reduce/deltas/ReduceInstructionFlagsMIR.h index a5a34d275e2f0..77d8eea12a2d6 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceInstructionFlagsMIR.h +++ b/llvm/tools/llvm-reduce/deltas/ReduceInstructionFlagsMIR.h @@ -17,7 +17,7 @@ #include "Delta.h" namespace llvm { -void reduceInstructionFlagsMIRDeltaPass(TestRunner &Test); +void reduceInstructionFlagsMIRDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); } // namespace llvm #endif diff --git a/llvm/tools/llvm-reduce/deltas/ReduceInstructions.cpp b/llvm/tools/llvm-reduce/deltas/ReduceInstructions.cpp index e1b7924594b5e..a906584f75dd7 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceInstructions.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceInstructions.cpp @@ -14,6 +14,8 @@ #include "ReduceInstructions.h" #include "Utils.h" #include "llvm/IR/Constants.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" #include using namespace llvm; @@ -29,7 +31,7 @@ static bool shouldAlwaysKeep(const Instruction &I) { /// Removes out-of-chunk arguments from functions, and modifies their calls /// accordingly. It also removes allocations of out-of-chunk arguments. -static void extractInstrFromModule(Oracle &O, ReducerWorkItem &WorkItem) { +void llvm::reduceInstructionsDeltaPass(Oracle &O, ReducerWorkItem &WorkItem) { Module &Program = WorkItem.getModule(); for (auto &F : Program) { @@ -46,7 +48,3 @@ static void extractInstrFromModule(Oracle &O, ReducerWorkItem &WorkItem) { } } } - -void llvm::reduceInstructionsDeltaPass(TestRunner &Test) { - runDeltaPass(Test, extractInstrFromModule, "Reducing Instructions"); -} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceInstructions.h b/llvm/tools/llvm-reduce/deltas/ReduceInstructions.h index 8c13a02cb98f3..ca3b7d521ce77 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceInstructions.h +++ b/llvm/tools/llvm-reduce/deltas/ReduceInstructions.h @@ -15,11 +15,9 @@ #define LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEINSTRUCTIONS_H #include "Delta.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h" namespace llvm { -void reduceInstructionsDeltaPass(TestRunner &Test); +void reduceInstructionsDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); } // namespace llvm #endif diff --git a/llvm/tools/llvm-reduce/deltas/ReduceInstructionsMIR.cpp b/llvm/tools/llvm-reduce/deltas/ReduceInstructionsMIR.cpp index 40bc6b180fb88..24975e9f7aaa7 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceInstructionsMIR.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceInstructionsMIR.cpp @@ -12,8 +12,6 @@ //===----------------------------------------------------------------------===// #include "ReduceInstructionsMIR.h" -#include "Delta.h" - #include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" @@ -151,13 +149,10 @@ static void extractInstrFromFunction(Oracle &O, MachineFunction &MF) { MI->eraseFromParent(); } -static void extractInstrFromModule(Oracle &O, ReducerWorkItem &WorkItem) { +void llvm::reduceInstructionsMIRDeltaPass(Oracle &O, + ReducerWorkItem &WorkItem) { for (const Function &F : WorkItem.getModule()) { if (MachineFunction *MF = WorkItem.MMI->getMachineFunction(F)) extractInstrFromFunction(O, *MF); } } - -void llvm::reduceInstructionsMIRDeltaPass(TestRunner &Test) { - runDeltaPass(Test, extractInstrFromModule, "Reducing Instructions"); -} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceInstructionsMIR.h b/llvm/tools/llvm-reduce/deltas/ReduceInstructionsMIR.h index 70e0ac5fcf37f..e07f5636b6c8b 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceInstructionsMIR.h +++ b/llvm/tools/llvm-reduce/deltas/ReduceInstructionsMIR.h @@ -14,10 +14,10 @@ #ifndef LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEINSTRUCTIONS_MIR_H #define LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEINSTRUCTIONS_MIR_H -namespace llvm { -class TestRunner; +#include "Delta.h" -void reduceInstructionsMIRDeltaPass(TestRunner &Test); +namespace llvm { +void reduceInstructionsMIRDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); } // namespace llvm #endif diff --git a/llvm/tools/llvm-reduce/deltas/ReduceInvokes.cpp b/llvm/tools/llvm-reduce/deltas/ReduceInvokes.cpp index c6425a753df54..ca0fb7156673a 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceInvokes.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceInvokes.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "ReduceInvokes.h" -#include "Delta.h" #include "llvm/IR/Instructions.h" #include "llvm/Transforms/Utils/Local.h" @@ -29,13 +28,9 @@ static void reduceInvokesInFunction(Oracle &O, Function &F) { // reduction. } -static void reduceInvokesInModule(Oracle &O, ReducerWorkItem &WorkItem) { +void llvm::reduceInvokesDeltaPass(Oracle &O, ReducerWorkItem &WorkItem) { for (Function &F : WorkItem.getModule()) { if (F.hasPersonalityFn()) reduceInvokesInFunction(O, F); } } - -void llvm::reduceInvokesDeltaPass(TestRunner &Test) { - runDeltaPass(Test, reduceInvokesInModule, "Reducing Invokes"); -} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceInvokes.h b/llvm/tools/llvm-reduce/deltas/ReduceInvokes.h index 9607add166005..4d246c22e6220 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceInvokes.h +++ b/llvm/tools/llvm-reduce/deltas/ReduceInvokes.h @@ -9,10 +9,10 @@ #ifndef LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEINVOKES_H #define LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEINVOKES_H -#include "TestRunner.h" +#include "Delta.h" namespace llvm { -void reduceInvokesDeltaPass(TestRunner &Test); +void reduceInvokesDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); } // namespace llvm #endif diff --git a/llvm/tools/llvm-reduce/deltas/ReduceMemoryOperations.cpp b/llvm/tools/llvm-reduce/deltas/ReduceMemoryOperations.cpp index 8e73ea076034c..4584694550936 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceMemoryOperations.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceMemoryOperations.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "ReduceMemoryOperations.h" -#include "Delta.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -36,15 +35,12 @@ static void removeVolatileInFunction(Oracle &O, Function &F) { } } -static void removeVolatileInModule(Oracle &O, ReducerWorkItem &WorkItem) { +void llvm::reduceVolatileInstructionsDeltaPass(Oracle &O, + ReducerWorkItem &WorkItem) { for (Function &F : WorkItem.getModule()) removeVolatileInFunction(O, F); } -void llvm::reduceVolatileInstructionsDeltaPass(TestRunner &Test) { - runDeltaPass(Test, removeVolatileInModule, "Reducing Volatile Instructions"); -} - static void reduceAtomicSyncScopesInFunction(Oracle &O, Function &F) { for (Instruction &I : instructions(F)) { if (LoadInst *LI = dyn_cast(&I)) { @@ -66,17 +62,12 @@ static void reduceAtomicSyncScopesInFunction(Oracle &O, Function &F) { } } -static void reduceAtomicSyncScopesInModule(Oracle &O, +void llvm::reduceAtomicSyncScopesDeltaPass(Oracle &O, ReducerWorkItem &WorkItem) { for (Function &F : WorkItem.getModule()) reduceAtomicSyncScopesInFunction(O, F); } -void llvm::reduceAtomicSyncScopesDeltaPass(TestRunner &Test) { - runDeltaPass(Test, reduceAtomicSyncScopesInModule, - "Reducing Atomic Sync Scopes"); -} - // TODO: Might be helpful to incrementally relax orders static void reduceAtomicOrderingInFunction(Oracle &O, Function &F) { for (Instruction &I : instructions(F)) { @@ -100,11 +91,7 @@ static void reduceAtomicOrderingInFunction(Oracle &O, Function &F) { } } -static void reduceAtomicOrderingInModule(Oracle &O, ReducerWorkItem &WorkItem) { +void llvm::reduceAtomicOrderingDeltaPass(Oracle &O, ReducerWorkItem &WorkItem) { for (Function &F : WorkItem.getModule()) reduceAtomicOrderingInFunction(O, F); } - -void llvm::reduceAtomicOrderingDeltaPass(TestRunner &Test) { - runDeltaPass(Test, reduceAtomicOrderingInModule, "Reducing Atomic Ordering"); -} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceMemoryOperations.h b/llvm/tools/llvm-reduce/deltas/ReduceMemoryOperations.h index ca6a770dff081..46ada3661e31d 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceMemoryOperations.h +++ b/llvm/tools/llvm-reduce/deltas/ReduceMemoryOperations.h @@ -9,12 +9,12 @@ #ifndef LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEMEMORYOPERATIONS_H #define LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEMEMORYOPERATIONS_H -#include "TestRunner.h" +#include "Delta.h" namespace llvm { -void reduceVolatileInstructionsDeltaPass(TestRunner &Test); -void reduceAtomicSyncScopesDeltaPass(TestRunner &Test); -void reduceAtomicOrderingDeltaPass(TestRunner &Test); +void reduceVolatileInstructionsDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); +void reduceAtomicSyncScopesDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); +void reduceAtomicOrderingDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); } // namespace llvm #endif diff --git a/llvm/tools/llvm-reduce/deltas/ReduceMetadata.cpp b/llvm/tools/llvm-reduce/deltas/ReduceMetadata.cpp index 316c74876025a..c0d0163dab5fb 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceMetadata.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceMetadata.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "ReduceMetadata.h" -#include "Delta.h" #include "llvm/ADT/Sequence.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/InstIterator.h" @@ -48,7 +47,7 @@ static constexpr StringLiteral ListNamedMetadata[] = { }; /// Remove unneeded arguments to named metadata. -static void reduceNamedMetadataOperands(Oracle &O, ReducerWorkItem &WorkItem) { +void llvm::reduceNamedMetadataDeltaPass(Oracle &O, ReducerWorkItem &WorkItem) { Module &M = WorkItem.getModule(); for (NamedMDNode &I : M.named_metadata()) { @@ -77,7 +76,7 @@ static void reduceNamedMetadataOperands(Oracle &O, ReducerWorkItem &WorkItem) { /// Removes all the Named and Unnamed Metadata Nodes, as well as any debug /// functions that aren't inside the desired Chunks. -static void extractMetadataFromModule(Oracle &O, ReducerWorkItem &WorkItem) { +void llvm::reduceMetadataDeltaPass(Oracle &O, ReducerWorkItem &WorkItem) { Module &Program = WorkItem.getModule(); // Get out-of-chunk Named metadata nodes @@ -122,11 +121,3 @@ static void extractMetadataFromModule(Oracle &O, ReducerWorkItem &WorkItem) { } } } - -void llvm::reduceMetadataDeltaPass(TestRunner &Test) { - runDeltaPass(Test, extractMetadataFromModule, "Reducing Metadata"); -} - -void llvm::reduceNamedMetadataDeltaPass(TestRunner &Test) { - runDeltaPass(Test, reduceNamedMetadataOperands, "Reducing Named Metadata"); -} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceMetadata.h b/llvm/tools/llvm-reduce/deltas/ReduceMetadata.h index f3af31a2759bc..34bf2feb92f74 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceMetadata.h +++ b/llvm/tools/llvm-reduce/deltas/ReduceMetadata.h @@ -14,11 +14,11 @@ #ifndef LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEMETADATA_H #define LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEMETADATA_H -#include "TestRunner.h" +#include "Delta.h" namespace llvm { -void reduceMetadataDeltaPass(TestRunner &Test); -void reduceNamedMetadataDeltaPass(TestRunner &Test); +void reduceMetadataDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); +void reduceNamedMetadataDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); } // namespace llvm #endif diff --git a/llvm/tools/llvm-reduce/deltas/ReduceModuleData.cpp b/llvm/tools/llvm-reduce/deltas/ReduceModuleData.cpp index 17930abe6dbfe..4aeaef6d8d676 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceModuleData.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceModuleData.cpp @@ -14,7 +14,7 @@ using namespace llvm; -static void clearModuleData(Oracle &O, ReducerWorkItem &WorkItem) { +void llvm::reduceModuleDataDeltaPass(Oracle &O, ReducerWorkItem &WorkItem) { Module &Program = WorkItem.getModule(); if (!Program.getModuleIdentifier().empty() && !O.shouldKeep()) @@ -25,7 +25,3 @@ static void clearModuleData(Oracle &O, ReducerWorkItem &WorkItem) { if (!Program.getModuleInlineAsm().empty() && !O.shouldKeep()) Program.setModuleInlineAsm(""); } - -void llvm::reduceModuleDataDeltaPass(TestRunner &Test) { - runDeltaPass(Test, clearModuleData, "Reducing Module Data"); -} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceModuleData.h b/llvm/tools/llvm-reduce/deltas/ReduceModuleData.h index 960fe8c6d3a6d..a5eaab9f9c59c 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceModuleData.h +++ b/llvm/tools/llvm-reduce/deltas/ReduceModuleData.h @@ -12,7 +12,7 @@ #include "Delta.h" namespace llvm { -void reduceModuleDataDeltaPass(TestRunner &Test); +void reduceModuleDataDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); } // namespace llvm #endif diff --git a/llvm/tools/llvm-reduce/deltas/ReduceOpcodes.cpp b/llvm/tools/llvm-reduce/deltas/ReduceOpcodes.cpp index 9fb4fd61c74e7..ceea71f68c932 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceOpcodes.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceOpcodes.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "ReduceOpcodes.h" -#include "Delta.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -240,7 +239,7 @@ static Value *reduceInstruction(Oracle &O, Module &M, Instruction &I) { return nullptr; } -static void replaceOpcodesInModule(Oracle &O, ReducerWorkItem &WorkItem) { +void llvm::reduceOpcodesDeltaPass(Oracle &O, ReducerWorkItem &WorkItem) { Module &Mod = WorkItem.getModule(); for (Function &F : Mod) { @@ -261,7 +260,3 @@ static void replaceOpcodesInModule(Oracle &O, ReducerWorkItem &WorkItem) { } } } - -void llvm::reduceOpcodesDeltaPass(TestRunner &Test) { - runDeltaPass(Test, replaceOpcodesInModule, "Reducing Opcodes"); -} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceOpcodes.h b/llvm/tools/llvm-reduce/deltas/ReduceOpcodes.h index 79edc7f32facf..5861c2571a1bd 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceOpcodes.h +++ b/llvm/tools/llvm-reduce/deltas/ReduceOpcodes.h @@ -9,10 +9,10 @@ #ifndef LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEOPCODES_H #define LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEOPCODES_H -#include "TestRunner.h" +#include "Delta.h" namespace llvm { -void reduceOpcodesDeltaPass(TestRunner &Test); +void reduceOpcodesDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); } // namespace llvm #endif diff --git a/llvm/tools/llvm-reduce/deltas/ReduceOperandBundles.cpp b/llvm/tools/llvm-reduce/deltas/ReduceOperandBundles.cpp index d2274877f126b..e5d7b187c8107 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceOperandBundles.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceOperandBundles.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "ReduceOperandBundles.h" -#include "Delta.h" #include "TestRunner.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" @@ -95,8 +94,7 @@ static void maybeRewriteCallWithDifferentBundles( } /// Removes out-of-chunk operand bundles from calls. -static void extractOperandBundesFromModule(Oracle &O, - ReducerWorkItem &WorkItem) { +void llvm::reduceOperandBundesDeltaPass(Oracle &O, ReducerWorkItem &WorkItem) { Module &Program = WorkItem.getModule(); OperandBundleRemapper R(O); R.visit(Program); @@ -104,8 +102,3 @@ static void extractOperandBundesFromModule(Oracle &O, for (const auto &I : R.CallsToRefine) maybeRewriteCallWithDifferentBundles(I.first, I.second); } - -void llvm::reduceOperandBundesDeltaPass(TestRunner &Test) { - runDeltaPass(Test, extractOperandBundesFromModule, - "Reducing Operand Bundles"); -} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceOperandBundles.h b/llvm/tools/llvm-reduce/deltas/ReduceOperandBundles.h index 390b029242536..23af510f7f31c 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceOperandBundles.h +++ b/llvm/tools/llvm-reduce/deltas/ReduceOperandBundles.h @@ -14,9 +14,10 @@ #ifndef LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEOPERANDBUNDLES_H #define LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEOPERANDBUNDLES_H +#include "Delta.h" + namespace llvm { -class TestRunner; -void reduceOperandBundesDeltaPass(TestRunner &Test); +void reduceOperandBundesDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); } // namespace llvm #endif diff --git a/llvm/tools/llvm-reduce/deltas/ReduceOperands.cpp b/llvm/tools/llvm-reduce/deltas/ReduceOperands.cpp index 5babdc7d0a940..c135f0c9e5c36 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceOperands.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceOperands.cpp @@ -83,7 +83,7 @@ static bool switchCaseExists(Use &Op, ConstantInt *CI) { return SI->findCaseValue(CI) != SI->case_default(); } -void llvm::reduceOperandsOneDeltaPass(TestRunner &Test) { +void llvm::reduceOperandsOneDeltaPass(Oracle &O, ReducerWorkItem &WorkItem) { auto ReduceValue = [](Use &Op) -> Value * { if (!shouldReduceOperand(Op)) return nullptr; @@ -118,15 +118,10 @@ void llvm::reduceOperandsOneDeltaPass(TestRunner &Test) { return nullptr; }; - runDeltaPass( - Test, - [ReduceValue](Oracle &O, ReducerWorkItem &WorkItem) { - extractOperandsFromModule(O, WorkItem, ReduceValue); - }, - "Reducing Operands to one"); + extractOperandsFromModule(O, WorkItem, ReduceValue); } -void llvm::reduceOperandsZeroDeltaPass(TestRunner &Test) { +void llvm::reduceOperandsZeroDeltaPass(Oracle &O, ReducerWorkItem &WorkItem) { auto ReduceValue = [](Use &Op) -> Value * { if (!shouldReduceOperand(Op)) return nullptr; @@ -148,15 +143,10 @@ void llvm::reduceOperandsZeroDeltaPass(TestRunner &Test) { // Don't replace existing zeroes. return isZero(Op) ? nullptr : Constant::getNullValue(Op->getType()); }; - runDeltaPass( - Test, - [ReduceValue](Oracle &O, ReducerWorkItem &Program) { - extractOperandsFromModule(O, Program, ReduceValue); - }, - "Reducing Operands to zero"); + extractOperandsFromModule(O, WorkItem, ReduceValue); } -void llvm::reduceOperandsNaNDeltaPass(TestRunner &Test) { +void llvm::reduceOperandsNaNDeltaPass(Oracle &O, ReducerWorkItem &WorkItem) { auto ReduceValue = [](Use &Op) -> Value * { Type *Ty = Op->getType(); if (!Ty->isFPOrFPVectorTy()) @@ -176,10 +166,5 @@ void llvm::reduceOperandsNaNDeltaPass(TestRunner &Test) { return ConstantFP::getQNaN(Ty); }; - runDeltaPass( - Test, - [ReduceValue](Oracle &O, ReducerWorkItem &Program) { - extractOperandsFromModule(O, Program, ReduceValue); - }, - "Reducing Operands to NaN"); + extractOperandsFromModule(O, WorkItem, ReduceValue); } diff --git a/llvm/tools/llvm-reduce/deltas/ReduceOperands.h b/llvm/tools/llvm-reduce/deltas/ReduceOperands.h index b4a18998f8e20..2c86ba920442b 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceOperands.h +++ b/llvm/tools/llvm-reduce/deltas/ReduceOperands.h @@ -12,9 +12,9 @@ #include "Delta.h" namespace llvm { -void reduceOperandsOneDeltaPass(TestRunner &Test); -void reduceOperandsZeroDeltaPass(TestRunner &Test); -void reduceOperandsNaNDeltaPass(TestRunner &Test); +void reduceOperandsOneDeltaPass(Oracle &, ReducerWorkItem &); +void reduceOperandsZeroDeltaPass(Oracle &, ReducerWorkItem &); +void reduceOperandsNaNDeltaPass(Oracle &, ReducerWorkItem &); } // namespace llvm #endif diff --git a/llvm/tools/llvm-reduce/deltas/ReduceOperandsSkip.cpp b/llvm/tools/llvm-reduce/deltas/ReduceOperandsSkip.cpp index 2a9d40d8c3c59..2eff3da263d31 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceOperandsSkip.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceOperandsSkip.cpp @@ -194,7 +194,7 @@ opportunities(Function &F, } } -static void extractOperandsFromModule(Oracle &O, ReducerWorkItem &WorkItem) { +void llvm::reduceOperandsSkipDeltaPass(Oracle &O, ReducerWorkItem &WorkItem) { Module &Program = WorkItem.getModule(); for (Function &F : Program.functions()) { @@ -229,8 +229,3 @@ static void extractOperandsFromModule(Oracle &O, ReducerWorkItem &WorkItem) { } } } - -void llvm::reduceOperandsSkipDeltaPass(TestRunner &Test) { - runDeltaPass(Test, extractOperandsFromModule, - "Reducing operands by skipping over instructions"); -} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceOperandsSkip.h b/llvm/tools/llvm-reduce/deltas/ReduceOperandsSkip.h index 79897011639a2..71047110701fa 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceOperandsSkip.h +++ b/llvm/tools/llvm-reduce/deltas/ReduceOperandsSkip.h @@ -12,7 +12,7 @@ #include "Delta.h" namespace llvm { -void reduceOperandsSkipDeltaPass(TestRunner &Test); +void reduceOperandsSkipDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); } // namespace llvm #endif /* LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEOPERANDSSKIP_H */ diff --git a/llvm/tools/llvm-reduce/deltas/ReduceOperandsToArgs.cpp b/llvm/tools/llvm-reduce/deltas/ReduceOperandsToArgs.cpp index 39302bd09fb49..0d984622bc298 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceOperandsToArgs.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceOperandsToArgs.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "ReduceOperandsToArgs.h" -#include "Delta.h" #include "Utils.h" #include "llvm/ADT/Sequence.h" #include "llvm/IR/Constants.h" @@ -196,7 +195,7 @@ static void substituteOperandWithArgument(Function *OldF, OldF->eraseFromParent(); } -static void reduceOperandsToArgs(Oracle &O, ReducerWorkItem &WorkItem) { +void llvm::reduceOperandsToArgsDeltaPass(Oracle &O, ReducerWorkItem &WorkItem) { Module &Program = WorkItem.getModule(); SmallVector OperandsToReduce; @@ -218,8 +217,3 @@ static void reduceOperandsToArgs(Oracle &O, ReducerWorkItem &WorkItem) { substituteOperandWithArgument(&F, OperandsToReduce); } } - -void llvm::reduceOperandsToArgsDeltaPass(TestRunner &Test) { - runDeltaPass(Test, reduceOperandsToArgs, - "Converting operands to function arguments"); -} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceOperandsToArgs.h b/llvm/tools/llvm-reduce/deltas/ReduceOperandsToArgs.h index 23043dd60b6ff..5d6e47c56059b 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceOperandsToArgs.h +++ b/llvm/tools/llvm-reduce/deltas/ReduceOperandsToArgs.h @@ -12,7 +12,7 @@ #include "Delta.h" namespace llvm { -void reduceOperandsToArgsDeltaPass(TestRunner &Test); +void reduceOperandsToArgsDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); } // namespace llvm #endif /* LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEOPERANDSTOARGS_H */ diff --git a/llvm/tools/llvm-reduce/deltas/ReduceRegisterDefs.cpp b/llvm/tools/llvm-reduce/deltas/ReduceRegisterDefs.cpp index 97259649ab858..e9d2e9a7b545f 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceRegisterDefs.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceRegisterDefs.cpp @@ -110,13 +110,10 @@ static void removeDefsFromFunction(Oracle &O, MachineFunction &MF) { } } -static void removeDefsFromModule(Oracle &O, ReducerWorkItem &WorkItem) { +void llvm::reduceRegisterDefsMIRDeltaPass(Oracle &O, + ReducerWorkItem &WorkItem) { for (const Function &F : WorkItem.getModule()) { if (auto *MF = WorkItem.MMI->getMachineFunction(F)) removeDefsFromFunction(O, *MF); } } - -void llvm::reduceRegisterDefsMIRDeltaPass(TestRunner &Test) { - runDeltaPass(Test, removeDefsFromModule, "Reducing register defs"); -} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceRegisterDefs.h b/llvm/tools/llvm-reduce/deltas/ReduceRegisterDefs.h index 031d24125bac8..88ea024722ebc 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceRegisterDefs.h +++ b/llvm/tools/llvm-reduce/deltas/ReduceRegisterDefs.h @@ -17,7 +17,7 @@ #include "Delta.h" namespace llvm { -void reduceRegisterDefsMIRDeltaPass(TestRunner &Test); +void reduceRegisterDefsMIRDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); } // namespace llvm #endif diff --git a/llvm/tools/llvm-reduce/deltas/ReduceRegisterMasks.cpp b/llvm/tools/llvm-reduce/deltas/ReduceRegisterMasks.cpp index 953e0e51afd82..f0c4cb991f7c7 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceRegisterMasks.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceRegisterMasks.cpp @@ -60,13 +60,10 @@ static void reduceMasksInFunction(Oracle &O, MachineFunction &MF) { } } -static void reduceMasksInModule(Oracle &O, ReducerWorkItem &WorkItem) { +void llvm::reduceRegisterMasksMIRDeltaPass(Oracle &O, + ReducerWorkItem &WorkItem) { for (const Function &F : WorkItem.getModule()) { if (auto *MF = WorkItem.MMI->getMachineFunction(F)) reduceMasksInFunction(O, *MF); } } - -void llvm::reduceRegisterMasksMIRDeltaPass(TestRunner &Test) { - runDeltaPass(Test, reduceMasksInModule, "Reducing register masks"); -} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceRegisterMasks.h b/llvm/tools/llvm-reduce/deltas/ReduceRegisterMasks.h index b8bb109e5c996..aee82a7c89214 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceRegisterMasks.h +++ b/llvm/tools/llvm-reduce/deltas/ReduceRegisterMasks.h @@ -12,7 +12,7 @@ #include "Delta.h" namespace llvm { -void reduceRegisterMasksMIRDeltaPass(TestRunner &Test); +void reduceRegisterMasksMIRDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); } // namespace llvm #endif diff --git a/llvm/tools/llvm-reduce/deltas/ReduceRegisterUses.cpp b/llvm/tools/llvm-reduce/deltas/ReduceRegisterUses.cpp index a608935736d1a..6c07f13b52db3 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceRegisterUses.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceRegisterUses.cpp @@ -55,13 +55,10 @@ static void removeUsesFromFunction(Oracle &O, MachineFunction &MF) { } } -static void removeUsesFromModule(Oracle &O, ReducerWorkItem &WorkItem) { +void llvm::reduceRegisterUsesMIRDeltaPass(Oracle &O, + ReducerWorkItem &WorkItem) { for (const Function &F : WorkItem.getModule()) { if (auto *MF = WorkItem.MMI->getMachineFunction(F)) removeUsesFromFunction(O, *MF); } } - -void llvm::reduceRegisterUsesMIRDeltaPass(TestRunner &Test) { - runDeltaPass(Test, removeUsesFromModule, "Reducing register uses"); -} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceRegisterUses.h b/llvm/tools/llvm-reduce/deltas/ReduceRegisterUses.h index 91ecba488f37c..40f6fcdb694af 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceRegisterUses.h +++ b/llvm/tools/llvm-reduce/deltas/ReduceRegisterUses.h @@ -17,7 +17,7 @@ #include "Delta.h" namespace llvm { -void reduceRegisterUsesMIRDeltaPass(TestRunner &Test); +void reduceRegisterUsesMIRDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); } // namespace llvm #endif diff --git a/llvm/tools/llvm-reduce/deltas/ReduceSpecialGlobals.cpp b/llvm/tools/llvm-reduce/deltas/ReduceSpecialGlobals.cpp index aadd038033d5c..9a452d86c58a7 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceSpecialGlobals.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceSpecialGlobals.cpp @@ -15,7 +15,6 @@ //===----------------------------------------------------------------------===// #include "ReduceSpecialGlobals.h" -#include "Delta.h" #include "Utils.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/Constants.h" @@ -27,8 +26,7 @@ static StringRef SpecialGlobalNames[] = {"llvm.used", "llvm.compiler.used"}; /// Removes all special globals aren't inside any of the /// desired Chunks. -static void extractSpecialGlobalsFromModule(Oracle &O, - ReducerWorkItem &WorkItem) { +void llvm::reduceSpecialGlobalsDeltaPass(Oracle &O, ReducerWorkItem &WorkItem) { Module &Program = WorkItem.getModule(); for (StringRef Name : SpecialGlobalNames) { @@ -40,8 +38,3 @@ static void extractSpecialGlobalsFromModule(Oracle &O, } } } - -void llvm::reduceSpecialGlobalsDeltaPass(TestRunner &Test) { - runDeltaPass(Test, extractSpecialGlobalsFromModule, - "Reducing Special Globals"); -} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceSpecialGlobals.h b/llvm/tools/llvm-reduce/deltas/ReduceSpecialGlobals.h index d17790529e06b..8332a2102df97 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceSpecialGlobals.h +++ b/llvm/tools/llvm-reduce/deltas/ReduceSpecialGlobals.h @@ -20,7 +20,7 @@ #include "Delta.h" namespace llvm { -void reduceSpecialGlobalsDeltaPass(TestRunner &Test); +void reduceSpecialGlobalsDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); } // namespace llvm #endif diff --git a/llvm/tools/llvm-reduce/deltas/ReduceUsingSimplifyCFG.cpp b/llvm/tools/llvm-reduce/deltas/ReduceUsingSimplifyCFG.cpp index c49fcb9855d41..ec37e248da8ed 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceUsingSimplifyCFG.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceUsingSimplifyCFG.cpp @@ -19,7 +19,8 @@ using namespace llvm; -static void reduceUsingSimplifyCFG(Oracle &O, ReducerWorkItem &WorkItem) { +void llvm::reduceUsingSimplifyCFGDeltaPass(Oracle &O, + ReducerWorkItem &WorkItem) { Module &Program = WorkItem.getModule(); SmallVector ToSimplify; for (auto &F : Program) @@ -31,9 +32,6 @@ static void reduceUsingSimplifyCFG(Oracle &O, ReducerWorkItem &WorkItem) { simplifyCFG(BB, TTI); } -void llvm::reduceUsingSimplifyCFGDeltaPass(TestRunner &Test) { - runDeltaPass(Test, reduceUsingSimplifyCFG, "Reducing using SimplifyCFG"); -} static void reduceConditionals(Oracle &O, ReducerWorkItem &WorkItem, bool Direction) { Module &M = WorkItem.getModule(); @@ -59,20 +57,12 @@ static void reduceConditionals(Oracle &O, ReducerWorkItem &WorkItem, simplifyCFG(BB, TTI); } -void llvm::reduceConditionalsTrueDeltaPass(TestRunner &Test) { - runDeltaPass( - Test, - [](Oracle &O, ReducerWorkItem &WorkItem) { - reduceConditionals(O, WorkItem, true); - }, - "Reducing conditional branches to true"); +void llvm::reduceConditionalsTrueDeltaPass(Oracle &O, + ReducerWorkItem &WorkItem) { + reduceConditionals(O, WorkItem, true); } -void llvm::reduceConditionalsFalseDeltaPass(TestRunner &Test) { - runDeltaPass( - Test, - [](Oracle &O, ReducerWorkItem &WorkItem) { - reduceConditionals(O, WorkItem, false); - }, - "Reducing conditional branches to false"); +void llvm::reduceConditionalsFalseDeltaPass(Oracle &O, + ReducerWorkItem &WorkItem) { + reduceConditionals(O, WorkItem, false); } diff --git a/llvm/tools/llvm-reduce/deltas/ReduceUsingSimplifyCFG.h b/llvm/tools/llvm-reduce/deltas/ReduceUsingSimplifyCFG.h index 01a14602909b3..48dce275574e9 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceUsingSimplifyCFG.h +++ b/llvm/tools/llvm-reduce/deltas/ReduceUsingSimplifyCFG.h @@ -17,9 +17,9 @@ #include "Delta.h" namespace llvm { -void reduceUsingSimplifyCFGDeltaPass(TestRunner &Test); -void reduceConditionalsTrueDeltaPass(TestRunner &Test); -void reduceConditionalsFalseDeltaPass(TestRunner &Test); +void reduceUsingSimplifyCFGDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); +void reduceConditionalsTrueDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); +void reduceConditionalsFalseDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); } // namespace llvm #endif diff --git a/llvm/tools/llvm-reduce/deltas/ReduceVirtualRegisters.cpp b/llvm/tools/llvm-reduce/deltas/ReduceVirtualRegisters.cpp index 3ec9555c0f2f5..ed8121d99130a 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceVirtualRegisters.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceVirtualRegisters.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "ReduceVirtualRegisters.h" -#include "Delta.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -33,15 +32,10 @@ static void dropRegisterHintsFromFunction(Oracle &O, MachineFunction &MF) { } } -static void dropRegisterHintsFromFunctions(Oracle &O, - ReducerWorkItem &WorkItem) { +void llvm::reduceVirtualRegisterHintsDeltaPass(Oracle &O, + ReducerWorkItem &WorkItem) { for (const Function &F : WorkItem.getModule()) { if (auto *MF = WorkItem.MMI->getMachineFunction(F)) dropRegisterHintsFromFunction(O, *MF); } } - -void llvm::reduceVirtualRegisterHintsDeltaPass(TestRunner &Test) { - runDeltaPass(Test, dropRegisterHintsFromFunctions, - "Reducing virtual register hints from functions"); -} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceVirtualRegisters.h b/llvm/tools/llvm-reduce/deltas/ReduceVirtualRegisters.h index 405ba31703e54..ff8ba4a004f34 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceVirtualRegisters.h +++ b/llvm/tools/llvm-reduce/deltas/ReduceVirtualRegisters.h @@ -14,11 +14,13 @@ #ifndef LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEVIRTUALREGISTERS_H #define LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEVIRTUALREGISTERS_H +#include "Delta.h" + namespace llvm { class TestRunner; /// Remove register allocation hints from virtual registes. -void reduceVirtualRegisterHintsDeltaPass(TestRunner &Test); +void reduceVirtualRegisterHintsDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); } // namespace llvm diff --git a/llvm/tools/llvm-reduce/deltas/RunIRPasses.cpp b/llvm/tools/llvm-reduce/deltas/RunIRPasses.cpp index f31c5d86dad1e..7d7355db15dd4 100644 --- a/llvm/tools/llvm-reduce/deltas/RunIRPasses.cpp +++ b/llvm/tools/llvm-reduce/deltas/RunIRPasses.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "RunIRPasses.h" -#include "Delta.h" #include "llvm/Passes/PassBuilder.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" @@ -24,7 +23,7 @@ static cl::opt "simplifycfg,infer-address-spaces)"), cl::cat(LLVMReduceOptions)); -static void runPasses(Oracle &O, ReducerWorkItem &WorkItem) { +void llvm::runIRPassesDeltaPass(Oracle &O, ReducerWorkItem &WorkItem) { Module &Program = WorkItem.getModule(); LoopAnalysisManager LAM; FunctionAnalysisManager FAM; @@ -47,7 +46,3 @@ static void runPasses(Oracle &O, ReducerWorkItem &WorkItem) { report_fatal_error(std::move(Err), false); MPM.run(Program, MAM); } - -void llvm::runIRPassesDeltaPass(TestRunner &Test) { - runDeltaPass(Test, runPasses, "Running passes"); -} diff --git a/llvm/tools/llvm-reduce/deltas/RunIRPasses.h b/llvm/tools/llvm-reduce/deltas/RunIRPasses.h index f1d4140d5b57f..bd7dd8adf1d89 100644 --- a/llvm/tools/llvm-reduce/deltas/RunIRPasses.h +++ b/llvm/tools/llvm-reduce/deltas/RunIRPasses.h @@ -12,7 +12,7 @@ #include "Delta.h" namespace llvm { -void runIRPassesDeltaPass(TestRunner &Test); +void runIRPassesDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); } // namespace llvm #endif diff --git a/llvm/tools/llvm-reduce/deltas/SimplifyInstructions.cpp b/llvm/tools/llvm-reduce/deltas/SimplifyInstructions.cpp index fc21593c5415c..7eb381d3c1905 100644 --- a/llvm/tools/llvm-reduce/deltas/SimplifyInstructions.cpp +++ b/llvm/tools/llvm-reduce/deltas/SimplifyInstructions.cpp @@ -19,7 +19,7 @@ using namespace llvm; /// Calls simplifyInstruction in each instruction in functions, and replaces /// their values. -static void extractInstrFromModule(Oracle &O, ReducerWorkItem &WorkItem) { +void llvm::simplifyInstructionsDeltaPass(Oracle &O, ReducerWorkItem &WorkItem) { std::vector InstsToDelete; Module &Program = WorkItem.getModule(); @@ -44,7 +44,3 @@ static void extractInstrFromModule(Oracle &O, ReducerWorkItem &WorkItem) { for (Instruction *I : InstToDelete) I->eraseFromParent(); } - -void llvm::simplifyInstructionsDeltaPass(TestRunner &Test) { - runDeltaPass(Test, extractInstrFromModule, "Simplifying Instructions"); -} diff --git a/llvm/tools/llvm-reduce/deltas/SimplifyInstructions.h b/llvm/tools/llvm-reduce/deltas/SimplifyInstructions.h index 215cffcd4d12e..4e4b913e3d191 100644 --- a/llvm/tools/llvm-reduce/deltas/SimplifyInstructions.h +++ b/llvm/tools/llvm-reduce/deltas/SimplifyInstructions.h @@ -12,7 +12,7 @@ #include "Delta.h" namespace llvm { -void simplifyInstructionsDeltaPass(TestRunner &Test); +void simplifyInstructionsDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); } // namespace llvm #endif diff --git a/llvm/tools/llvm-reduce/deltas/StripDebugInfo.cpp b/llvm/tools/llvm-reduce/deltas/StripDebugInfo.cpp index c9e1261c366a7..c8077d20fdd29 100644 --- a/llvm/tools/llvm-reduce/deltas/StripDebugInfo.cpp +++ b/llvm/tools/llvm-reduce/deltas/StripDebugInfo.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "StripDebugInfo.h" -#include "Delta.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Metadata.h" @@ -15,7 +14,7 @@ using namespace llvm; /// Removes all aliases aren't inside any of the /// desired Chunks. -static void stripDebugInfoImpl(Oracle &O, ReducerWorkItem &WorkItem) { +void llvm::stripDebugInfoDeltaPass(Oracle &O, ReducerWorkItem &WorkItem) { Module &Program = WorkItem.getModule(); bool HasDebugInfo = any_of(Program.named_metadata(), [](NamedMDNode &NMD) { return NMD.getName().starts_with("llvm.dbg."); @@ -23,7 +22,3 @@ static void stripDebugInfoImpl(Oracle &O, ReducerWorkItem &WorkItem) { if (HasDebugInfo && !O.shouldKeep()) StripDebugInfo(Program); } - -void llvm::stripDebugInfoDeltaPass(TestRunner &Test) { - runDeltaPass(Test, stripDebugInfoImpl, "Stripping Debug Info"); -} diff --git a/llvm/tools/llvm-reduce/deltas/StripDebugInfo.h b/llvm/tools/llvm-reduce/deltas/StripDebugInfo.h index 56be459546e94..b88bb98ce668c 100644 --- a/llvm/tools/llvm-reduce/deltas/StripDebugInfo.h +++ b/llvm/tools/llvm-reduce/deltas/StripDebugInfo.h @@ -12,7 +12,7 @@ #include "Delta.h" namespace llvm { -void stripDebugInfoDeltaPass(TestRunner &Test); +void stripDebugInfoDeltaPass(Oracle &O, ReducerWorkItem &WorkItem); } // namespace llvm #endif diff --git a/llvm/tools/llvm-reduce/deltas/Utils.h b/llvm/tools/llvm-reduce/deltas/Utils.h index e94aee5a91153..8cb4a3ebaf644 100644 --- a/llvm/tools/llvm-reduce/deltas/Utils.h +++ b/llvm/tools/llvm-reduce/deltas/Utils.h @@ -13,11 +13,12 @@ #ifndef LLVM_TOOLS_LLVM_REDUCE_DELTAS_UTILS_H #define LLVM_TOOLS_LLVM_REDUCE_DELTAS_UTILS_H -#include "llvm/IR/Function.h" -#include "llvm/IR/Value.h" #include "llvm/Support/CommandLine.h" namespace llvm { +class Function; +class Type; +class Value; extern cl::opt Verbose; diff --git a/llvm/tools/llvm-xray/xray-stacks.cpp b/llvm/tools/llvm-xray/xray-stacks.cpp index aebca9048d4d4..cbf6faeb32960 100644 --- a/llvm/tools/llvm-xray/xray-stacks.cpp +++ b/llvm/tools/llvm-xray/xray-stacks.cpp @@ -267,15 +267,11 @@ static StackDuration mergeStackDuration(const StackDuration &Left, Data.IntermediateDurations.reserve(Left.IntermediateDurations.size() + Right.IntermediateDurations.size()); // Aggregate the durations. - for (auto duration : Left.TerminalDurations) - Data.TerminalDurations.push_back(duration); - for (auto duration : Right.TerminalDurations) - Data.TerminalDurations.push_back(duration); - - for (auto duration : Left.IntermediateDurations) - Data.IntermediateDurations.push_back(duration); - for (auto duration : Right.IntermediateDurations) - Data.IntermediateDurations.push_back(duration); + llvm::append_range(Data.TerminalDurations, Left.TerminalDurations); + llvm::append_range(Data.TerminalDurations, Right.TerminalDurations); + + llvm::append_range(Data.IntermediateDurations, Left.IntermediateDurations); + llvm::append_range(Data.IntermediateDurations, Right.IntermediateDurations); return Data; } @@ -506,8 +502,7 @@ class StackTrie { for (const auto &RootNodeRange : make_range(map_iterator(Roots.begin(), MapValueFn), map_iterator(Roots.end(), MapValueFn))) { - for (auto *RootNode : RootNodeRange) - RootValues.push_back(RootNode); + llvm::append_range(RootValues, RootNodeRange); } print(OS, FN, RootValues); @@ -565,8 +560,7 @@ class StackTrie { while (!S.empty()) { auto *Top = S.pop_back_val(); printSingleStack(OS, FN, ReportThread, ThreadId, Top); - for (const auto *C : Top->Callees) - S.push_back(C); + llvm::append_range(S, Top->Callees); } } } @@ -641,8 +635,7 @@ class StackTrie { TopStacksByCount.pop_back(); } } - for (const auto *C : Top->Callees) - S.push_back(C); + llvm::append_range(S, Top->Callees); } } diff --git a/llvm/tools/lto/lto.cpp b/llvm/tools/lto/lto.cpp index b377693444189..29219c9114522 100644 --- a/llvm/tools/lto/lto.cpp +++ b/llvm/tools/lto/lto.cpp @@ -475,8 +475,7 @@ void lto_set_debug_options(const char *const *options, int number) { // Need to put each suboption in a null-terminated string before passing to // parseCommandLineOptions(). std::vector Options; - for (int i = 0; i < number; ++i) - Options.push_back(options[i]); + llvm::append_range(Options, ArrayRef(options, number)); llvm::parseCommandLineOptions(Options); optionParsingState = OptParsingState::Early; @@ -498,8 +497,7 @@ void lto_codegen_debug_options_array(lto_code_gen_t cg, assert(optionParsingState != OptParsingState::Early && "early option processing already happened"); SmallVector Options; - for (int i = 0; i < number; ++i) - Options.push_back(options[i]); + llvm::append_range(Options, ArrayRef(options, number)); unwrap(cg)->setCodeGenDebugOptions(ArrayRef(Options)); } diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp index b1c8032ea2192..c56ed15501b40 100644 --- a/llvm/tools/obj2yaml/elf2yaml.cpp +++ b/llvm/tools/obj2yaml/elf2yaml.cpp @@ -1266,8 +1266,7 @@ ELFDumper::dumpSymtabShndxSection(const Elf_Shdr *Shdr) { return EntriesOrErr.takeError(); S->Entries.emplace(); - for (const Elf_Word &E : *EntriesOrErr) - S->Entries->push_back(E); + llvm::append_range(*S->Entries, *EntriesOrErr); return S.release(); } @@ -1490,8 +1489,7 @@ ELFDumper::dumpSymverSection(const Elf_Shdr *Shdr) { return VersionsOrErr.takeError(); S->Entries.emplace(); - for (const Elf_Half &E : *VersionsOrErr) - S->Entries->push_back(E); + llvm::append_range(*S->Entries, *VersionsOrErr); return S.release(); } diff --git a/llvm/tools/obj2yaml/macho2yaml.cpp b/llvm/tools/obj2yaml/macho2yaml.cpp index cdd871e8c1d68..00220123e8189 100644 --- a/llvm/tools/obj2yaml/macho2yaml.cpp +++ b/llvm/tools/obj2yaml/macho2yaml.cpp @@ -364,8 +364,7 @@ void MachODumper::dumpFunctionStarts(std::unique_ptr &Y) { MachOYAML::LinkEditData &LEData = Y->LinkEdit; auto FunctionStarts = Obj.getFunctionStarts(); - for (auto Addr : FunctionStarts) - LEData.FunctionStarts.push_back(Addr); + llvm::append_range(LEData.FunctionStarts, FunctionStarts); } void MachODumper::dumpRebaseOpcodes(std::unique_ptr &Y) { @@ -637,9 +636,7 @@ void MachODumper::dumpChainedFixups(std::unique_ptr &Y) { assert(DC.dataoff < Obj.getData().size()); assert(DC.dataoff + DC.datasize <= Obj.getData().size()); const char *Bytes = Obj.getData().data() + DC.dataoff; - for (size_t Idx = 0; Idx < DC.datasize; Idx++) { - LEData.ChainedFixups.push_back(Bytes[Idx]); - } + llvm::append_range(LEData.ChainedFixups, ArrayRef(Bytes, DC.datasize)); } break; } diff --git a/llvm/unittests/ADT/DenseMapTest.cpp b/llvm/unittests/ADT/DenseMapTest.cpp index d1bbdde8dfc26..a4c045585fc28 100644 --- a/llvm/unittests/ADT/DenseMapTest.cpp +++ b/llvm/unittests/ADT/DenseMapTest.cpp @@ -379,6 +379,28 @@ TEST(DenseMapCustomTest, EqualityComparison) { EXPECT_NE(M1, M3); } +TEST(DenseMapCustomTest, InsertRange) { + DenseMap M; + + std::pair InputVals[3] = {{0, 0}, {0, 1}, {1, 2}}; + M.insert_range(InputVals); + + EXPECT_EQ(M.size(), 2u); + EXPECT_THAT(M, testing::UnorderedElementsAre(testing::Pair(0, 0), + testing::Pair(1, 2))); +} + +TEST(SmallDenseMapCustomTest, InsertRange) { + SmallDenseMap M; + + std::pair InputVals[3] = {{0, 0}, {0, 1}, {1, 2}}; + M.insert_range(InputVals); + + EXPECT_EQ(M.size(), 2u); + EXPECT_THAT(M, testing::UnorderedElementsAre(testing::Pair(0, 0), + testing::Pair(1, 2))); +} + // Test for the default minimum size of a DenseMap TEST(DenseMapCustomTest, DefaultMinReservedSizeTest) { // IF THIS VALUE CHANGE, please update InitialSizeTest, InitFromIterator, and diff --git a/llvm/unittests/ADT/DenseSetTest.cpp b/llvm/unittests/ADT/DenseSetTest.cpp index 5a8ee592ddfc7..a24f99b6bb34f 100644 --- a/llvm/unittests/ADT/DenseSetTest.cpp +++ b/llvm/unittests/ADT/DenseSetTest.cpp @@ -58,6 +58,13 @@ TEST(DenseSetTest, InsertRange) { EXPECT_THAT(set, ::testing::UnorderedElementsAre(1, 2, 3)); } +TEST(SmallDenseSetTest, InsertRange) { + llvm::SmallDenseSet set; + constexpr unsigned Args[] = {9, 7, 8}; + set.insert_range(Args); + EXPECT_THAT(set, ::testing::UnorderedElementsAre(7, 8, 9)); +} + struct TestDenseSetInfo { static inline unsigned getEmptyKey() { return ~0; } static inline unsigned getTombstoneKey() { return ~0U - 1; } diff --git a/llvm/unittests/IR/DebugInfoTest.cpp b/llvm/unittests/IR/DebugInfoTest.cpp index d019823a5548d..8748371ae4b47 100644 --- a/llvm/unittests/IR/DebugInfoTest.cpp +++ b/llvm/unittests/IR/DebugInfoTest.cpp @@ -501,6 +501,40 @@ TEST(DIBuilder, DIEnumerator) { EXPECT_FALSE(E2); } +TEST(DIBuilder, FixedPointType) { + LLVMContext Ctx; + std::unique_ptr M(new Module("MyModule", Ctx)); + DIBuilder DIB(*M); + + DIFixedPointType *Ty = DIB.createBinaryFixedPointType( + {}, 32, 0, dwarf::DW_ATE_signed_fixed, DINode::FlagZero, -4); + EXPECT_TRUE(Ty); + EXPECT_TRUE(Ty->getKind() == DIFixedPointType::FixedPointBinary); + EXPECT_TRUE(Ty->getFactor() == -4); + EXPECT_TRUE(Ty->getEncoding() == dwarf::DW_ATE_signed_fixed); + EXPECT_TRUE(Ty->getTag() == dwarf::DW_TAG_base_type); + + Ty = DIB.createDecimalFixedPointType({}, 32, 0, dwarf::DW_ATE_unsigned_fixed, + DINode::FlagZero, -7); + EXPECT_TRUE(Ty); + EXPECT_TRUE(Ty->getKind() == DIFixedPointType::FixedPointDecimal); + EXPECT_TRUE(Ty->getFactor() == -7); + EXPECT_TRUE(Ty->getEncoding() == dwarf::DW_ATE_unsigned_fixed); + EXPECT_TRUE(Ty->getTag() == dwarf::DW_TAG_base_type); + + APSInt Num(APInt(32, 1)); + APSInt Denom(APInt(33, 72)); + Ty = DIB.createRationalFixedPointType({}, 32, 0, dwarf::DW_ATE_unsigned_fixed, + DINode::FlagZero, Num, Denom); + EXPECT_TRUE(Ty); + EXPECT_TRUE(Ty->getKind() == DIFixedPointType::FixedPointRational); + EXPECT_TRUE(Ty->getFactorRaw() == 0); + EXPECT_TRUE(Ty->getNumerator() == Num); + EXPECT_TRUE(Ty->getDenominator() == Denom); + EXPECT_TRUE(Ty->getEncoding() == dwarf::DW_ATE_unsigned_fixed); + EXPECT_TRUE(Ty->getTag() == dwarf::DW_TAG_base_type); +} + TEST(DbgAssignIntrinsicTest, replaceVariableLocationOp) { LLVMContext C; std::unique_ptr M = parseIR(C, R"( diff --git a/llvm/unittests/tools/llvm-exegesis/AArch64/TargetTest.cpp b/llvm/unittests/tools/llvm-exegesis/AArch64/TargetTest.cpp index 71675d9f46739..ca5416eef39d5 100644 --- a/llvm/unittests/tools/llvm-exegesis/AArch64/TargetTest.cpp +++ b/llvm/unittests/tools/llvm-exegesis/AArch64/TargetTest.cpp @@ -65,7 +65,7 @@ TEST_F(AArch64TargetTest, SetRegToConstant) { } TEST_F(AArch64TargetTest, DefaultPfmCounters) { - const std::string Expected = "CPU_CYCLES"; + const std::string Expected = "CYCLES"; EXPECT_EQ(ExegesisTarget_->getPfmCounters("").CycleCounter, Expected); EXPECT_EQ(ExegesisTarget_->getPfmCounters("unknown_cpu").CycleCounter, Expected); diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp index 0b553c3a3d456..0364b02c2483d 100644 --- a/llvm/utils/TableGen/DXILEmitter.cpp +++ b/llvm/utils/TableGen/DXILEmitter.cpp @@ -113,9 +113,7 @@ DXILOperationDesc::DXILOperationDesc(const Record *R) { ParamTypeRecs.push_back(R->getValueAsDef("result")); - for (const Record *ArgTy : R->getValueAsListOfDefs("arguments")) { - ParamTypeRecs.push_back(ArgTy); - } + llvm::append_range(ParamTypeRecs, R->getValueAsListOfDefs("arguments")); size_t ParamTypeRecsSize = ParamTypeRecs.size(); // Populate OpTypes with return type and parameter types @@ -148,9 +146,7 @@ DXILOperationDesc::DXILOperationDesc(const Record *R) { // Sort records in ascending order of DXIL version ascendingSortByVersion(Recs); - for (const Record *CR : Recs) { - OverloadRecs.push_back(CR); - } + llvm::append_range(OverloadRecs, Recs); // Get stage records Recs = R->getValueAsListOfDefs("stages"); @@ -163,9 +159,7 @@ DXILOperationDesc::DXILOperationDesc(const Record *R) { // Sort records in ascending order of DXIL version ascendingSortByVersion(Recs); - for (const Record *CR : Recs) { - StageRecs.push_back(CR); - } + llvm::append_range(StageRecs, Recs); // Get attribute records Recs = R->getValueAsListOfDefs("attributes"); @@ -173,9 +167,7 @@ DXILOperationDesc::DXILOperationDesc(const Record *R) { // Sort records in ascending order of DXIL version ascendingSortByVersion(Recs); - for (const Record *CR : Recs) { - AttrRecs.push_back(CR); - } + llvm::append_range(AttrRecs, Recs); // Get the operation class OpClass = R->getValueAsDef("OpClass")->getName(); diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp index e1344ae54b20e..cf7c02db8842e 100644 --- a/llvm/utils/TableGen/DecoderEmitter.cpp +++ b/llvm/utils/TableGen/DecoderEmitter.cpp @@ -1342,8 +1342,7 @@ void FilterChooser::emitPredicateTableEntry(DecoderTableInfo &TableInfo, TableInfo.Table.push_back(MCD::OPC_CheckPredicate); // Predicate index. - for (const auto PB : PBytes) - TableInfo.Table.push_back(PB); + llvm::append_range(TableInfo.Table, PBytes); // Push location for NumToSkip backpatching. TableInfo.FixupStack.back().push_back(TableInfo.Table.size()); TableInfo.Table.push_back(0); @@ -1402,15 +1401,13 @@ void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo, raw_svector_ostream S(MaskBytes); if (NeedPositiveMask) { encodeULEB128(PositiveMask.getZExtValue(), S); - for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i) - TableInfo.Table.push_back(MaskBytes[i]); + llvm::append_range(TableInfo.Table, MaskBytes); } else TableInfo.Table.push_back(0); if (NeedNegativeMask) { MaskBytes.clear(); encodeULEB128(NegativeMask.getZExtValue(), S); - for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i) - TableInfo.Table.push_back(MaskBytes[i]); + llvm::append_range(TableInfo.Table, MaskBytes); } else TableInfo.Table.push_back(0); } @@ -1483,8 +1480,7 @@ void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo, encodeULEB128(DIdx, S); // Decoder index. - for (const auto B : Bytes) - TableInfo.Table.push_back(B); + llvm::append_range(TableInfo.Table, Bytes); if (!HasCompleteDecoder) { // Push location for NumToSkip backpatching. diff --git a/llvm/utils/TableGen/X86DisassemblerTables.cpp b/llvm/utils/TableGen/X86DisassemblerTables.cpp index 5e7983a101e0b..36f752a1ebe63 100644 --- a/llvm/utils/TableGen/X86DisassemblerTables.cpp +++ b/llvm/utils/TableGen/X86DisassemblerTables.cpp @@ -746,8 +746,7 @@ void DisassemblerTables::emitModRMDecision(raw_ostream &o1, raw_ostream &o2, ModRMDecision.push_back(decision.instructionIDs[index]); break; case MODRM_FULL: - for (unsigned short InstructionID : decision.instructionIDs) - ModRMDecision.push_back(InstructionID); + llvm::append_range(ModRMDecision, decision.instructionIDs); break; } diff --git a/llvm/utils/not/not.cpp b/llvm/utils/not/not.cpp index 6ba59190d8ada..6f270cb0f7783 100644 --- a/llvm/utils/not/not.cpp +++ b/llvm/utils/not/not.cpp @@ -57,10 +57,7 @@ int main(int argc, const char **argv) { return 1; } - std::vector Argv; - Argv.reserve(argc); - for (int i = 0; i < argc; ++i) - Argv.push_back(argv[i]); + SmallVector Argv(ArrayRef(argv, argc)); std::string ErrMsg; int Result = sys::ExecuteAndWait(*Program, Argv, std::nullopt, {}, 0, 0, &ErrMsg); diff --git a/mlir/include/mlir/Dialect/MPI/IR/MPI.td b/mlir/include/mlir/Dialect/MPI/IR/MPI.td index 7c84443e5520d..f2837e71df060 100644 --- a/mlir/include/mlir/Dialect/MPI/IR/MPI.td +++ b/mlir/include/mlir/Dialect/MPI/IR/MPI.td @@ -246,12 +246,7 @@ def MPI_OpClassEnum : I32EnumAttr<"MPI_OpClassEnum", "MPI operation class", [ MPI_OpMaxloc, MPI_OpReplace ]> { - let genSpecializedAttr = 0; let cppNamespace = "::mlir::mpi"; } -def MPI_OpClassAttr : EnumAttr { - let assemblyFormat = "`<` $value `>`"; -} - #endif // MLIR_DIALECT_MPI_IR_MPI_TD diff --git a/mlir/include/mlir/Dialect/MPI/IR/MPIOps.td b/mlir/include/mlir/Dialect/MPI/IR/MPIOps.td index db28bd09678f8..a8267b115b9e6 100644 --- a/mlir/include/mlir/Dialect/MPI/IR/MPIOps.td +++ b/mlir/include/mlir/Dialect/MPI/IR/MPIOps.td @@ -244,7 +244,7 @@ def MPI_AllReduceOp : MPI_Op<"allreduce", []> { let arguments = ( ins AnyMemRef : $sendbuf, AnyMemRef : $recvbuf, - MPI_OpClassAttr : $op + MPI_OpClassEnum : $op ); let results = (outs Optional:$retval); diff --git a/mlir/lib/Conversion/MPIToLLVM/MPIToLLVM.cpp b/mlir/lib/Conversion/MPIToLLVM/MPIToLLVM.cpp index d91f9512ccb8f..4e0f59305a647 100644 --- a/mlir/lib/Conversion/MPIToLLVM/MPIToLLVM.cpp +++ b/mlir/lib/Conversion/MPIToLLVM/MPIToLLVM.cpp @@ -47,6 +47,22 @@ static LLVM::LLVMFuncOp getOrDefineFunction(ModuleOp &moduleOp, moduleOp, loc, rewriter, name, name, type, LLVM::Linkage::External); } +std::pair getRawPtrAndSize(const Location loc, + ConversionPatternRewriter &rewriter, + Value memRef, Type elType) { + Type ptrType = LLVM::LLVMPointerType::get(rewriter.getContext()); + Value dataPtr = + rewriter.create(loc, ptrType, memRef, 1); + Value offset = rewriter.create( + loc, rewriter.getI64Type(), memRef, 2); + Value resPtr = + rewriter.create(loc, ptrType, elType, dataPtr, offset); + Value size = rewriter.create(loc, memRef, + ArrayRef{3, 0}); + size = rewriter.create(loc, rewriter.getI32Type(), size); + return {resPtr, size}; +} + /// When lowering the mpi dialect to functions calls certain details /// differ between various MPI implementations. This class will provide /// these in a generic way, depending on the MPI implementation that got @@ -77,6 +93,12 @@ class MPIImplTraits { /// type. virtual Value getDataType(const Location loc, ConversionPatternRewriter &rewriter, Type type) = 0; + + /// Gets or creates an MPI_Op value which corresponds to the given + /// enum value. + virtual Value getMPIOp(const Location loc, + ConversionPatternRewriter &rewriter, + mpi::MPI_OpClassEnum opAttr) = 0; }; //===----------------------------------------------------------------------===// @@ -94,6 +116,20 @@ class MPICHImplTraits : public MPIImplTraits { static constexpr int MPI_UINT16_T = 0x4c00023c; static constexpr int MPI_UINT32_T = 0x4c00043d; static constexpr int MPI_UINT64_T = 0x4c00083e; + static constexpr int MPI_MAX = 0x58000001; + static constexpr int MPI_MIN = 0x58000002; + static constexpr int MPI_SUM = 0x58000003; + static constexpr int MPI_PROD = 0x58000004; + static constexpr int MPI_LAND = 0x58000005; + static constexpr int MPI_BAND = 0x58000006; + static constexpr int MPI_LOR = 0x58000007; + static constexpr int MPI_BOR = 0x58000008; + static constexpr int MPI_LXOR = 0x58000009; + static constexpr int MPI_BXOR = 0x5800000a; + static constexpr int MPI_MINLOC = 0x5800000b; + static constexpr int MPI_MAXLOC = 0x5800000c; + static constexpr int MPI_REPLACE = 0x5800000d; + static constexpr int MPI_NO_OP = 0x5800000e; public: using MPIImplTraits::MPIImplTraits; @@ -136,6 +172,56 @@ class MPICHImplTraits : public MPIImplTraits { assert(false && "unsupported type"); return rewriter.create(loc, rewriter.getI32Type(), mtype); } + + Value getMPIOp(const Location loc, ConversionPatternRewriter &rewriter, + mpi::MPI_OpClassEnum opAttr) override { + int32_t op = MPI_NO_OP; + switch (opAttr) { + case mpi::MPI_OpClassEnum::MPI_OP_NULL: + op = MPI_NO_OP; + break; + case mpi::MPI_OpClassEnum::MPI_MAX: + op = MPI_MAX; + break; + case mpi::MPI_OpClassEnum::MPI_MIN: + op = MPI_MIN; + break; + case mpi::MPI_OpClassEnum::MPI_SUM: + op = MPI_SUM; + break; + case mpi::MPI_OpClassEnum::MPI_PROD: + op = MPI_PROD; + break; + case mpi::MPI_OpClassEnum::MPI_LAND: + op = MPI_LAND; + break; + case mpi::MPI_OpClassEnum::MPI_BAND: + op = MPI_BAND; + break; + case mpi::MPI_OpClassEnum::MPI_LOR: + op = MPI_LOR; + break; + case mpi::MPI_OpClassEnum::MPI_BOR: + op = MPI_BOR; + break; + case mpi::MPI_OpClassEnum::MPI_LXOR: + op = MPI_LXOR; + break; + case mpi::MPI_OpClassEnum::MPI_BXOR: + op = MPI_BXOR; + break; + case mpi::MPI_OpClassEnum::MPI_MINLOC: + op = MPI_MINLOC; + break; + case mpi::MPI_OpClassEnum::MPI_MAXLOC: + op = MPI_MAXLOC; + break; + case mpi::MPI_OpClassEnum::MPI_REPLACE: + op = MPI_REPLACE; + break; + } + return rewriter.create(loc, rewriter.getI32Type(), op); + } }; //===----------------------------------------------------------------------===// @@ -205,15 +291,74 @@ class OMPIImplTraits : public MPIImplTraits { auto context = rewriter.getContext(); // get external opaque struct pointer type - auto commStructT = + auto typeStructT = LLVM::LLVMStructType::getOpaque("ompi_predefined_datatype_t", context); // make sure global op definition exists - getOrDefineExternalStruct(loc, rewriter, mtype, commStructT); + getOrDefineExternalStruct(loc, rewriter, mtype, typeStructT); // get address of symbol return rewriter.create( loc, LLVM::LLVMPointerType::get(context), SymbolRefAttr::get(context, mtype)); } + + Value getMPIOp(const Location loc, ConversionPatternRewriter &rewriter, + mpi::MPI_OpClassEnum opAttr) override { + StringRef op; + switch (opAttr) { + case mpi::MPI_OpClassEnum::MPI_OP_NULL: + op = "ompi_mpi_no_op"; + break; + case mpi::MPI_OpClassEnum::MPI_MAX: + op = "ompi_mpi_max"; + break; + case mpi::MPI_OpClassEnum::MPI_MIN: + op = "ompi_mpi_min"; + break; + case mpi::MPI_OpClassEnum::MPI_SUM: + op = "ompi_mpi_sum"; + break; + case mpi::MPI_OpClassEnum::MPI_PROD: + op = "ompi_mpi_prod"; + break; + case mpi::MPI_OpClassEnum::MPI_LAND: + op = "ompi_mpi_land"; + break; + case mpi::MPI_OpClassEnum::MPI_BAND: + op = "ompi_mpi_band"; + break; + case mpi::MPI_OpClassEnum::MPI_LOR: + op = "ompi_mpi_lor"; + break; + case mpi::MPI_OpClassEnum::MPI_BOR: + op = "ompi_mpi_bor"; + break; + case mpi::MPI_OpClassEnum::MPI_LXOR: + op = "ompi_mpi_lxor"; + break; + case mpi::MPI_OpClassEnum::MPI_BXOR: + op = "ompi_mpi_bxor"; + break; + case mpi::MPI_OpClassEnum::MPI_MINLOC: + op = "ompi_mpi_minloc"; + break; + case mpi::MPI_OpClassEnum::MPI_MAXLOC: + op = "ompi_mpi_maxloc"; + break; + case mpi::MPI_OpClassEnum::MPI_REPLACE: + op = "ompi_mpi_replace"; + break; + } + auto context = rewriter.getContext(); + // get external opaque struct pointer type + auto opStructT = + LLVM::LLVMStructType::getOpaque("ompi_predefined_op_t", context); + // make sure global op definition exists + getOrDefineExternalStruct(loc, rewriter, op, opStructT); + // get address of symbol + return rewriter.create( + loc, LLVM::LLVMPointerType::get(context), + SymbolRefAttr::get(context, op)); + } }; std::unique_ptr MPIImplTraits::get(ModuleOp &moduleOp) { @@ -365,8 +510,6 @@ struct SendOpLowering : public ConvertOpToLLVMPattern { Location loc = op.getLoc(); MLIRContext *context = rewriter.getContext(); Type i32 = rewriter.getI32Type(); - Type i64 = rewriter.getI64Type(); - Value memRef = adaptor.getRef(); Type elemType = op.getRef().getType().getElementType(); // ptrType `!llvm.ptr` @@ -376,14 +519,8 @@ struct SendOpLowering : public ConvertOpToLLVMPattern { auto moduleOp = op->getParentOfType(); // get MPI_COMM_WORLD, dataType and pointer - Value dataPtr = - rewriter.create(loc, ptrType, memRef, 1); - Value offset = rewriter.create(loc, i64, memRef, 2); - dataPtr = - rewriter.create(loc, ptrType, elemType, dataPtr, offset); - Value size = rewriter.create(loc, memRef, - ArrayRef{3, 0}); - size = rewriter.create(loc, i32, size); + auto [dataPtr, size] = + getRawPtrAndSize(loc, rewriter, adaptor.getRef(), elemType); auto mpiTraits = MPIImplTraits::get(moduleOp); Value dataType = mpiTraits->getDataType(loc, rewriter, elemType); Value commWorld = mpiTraits->getCommWorld(loc, rewriter); @@ -425,7 +562,6 @@ struct RecvOpLowering : public ConvertOpToLLVMPattern { MLIRContext *context = rewriter.getContext(); Type i32 = rewriter.getI32Type(); Type i64 = rewriter.getI64Type(); - Value memRef = adaptor.getRef(); Type elemType = op.getRef().getType().getElementType(); // ptrType `!llvm.ptr` @@ -435,14 +571,8 @@ struct RecvOpLowering : public ConvertOpToLLVMPattern { auto moduleOp = op->getParentOfType(); // get MPI_COMM_WORLD, dataType, status_ignore and pointer - Value dataPtr = - rewriter.create(loc, ptrType, memRef, 1); - Value offset = rewriter.create(loc, i64, memRef, 2); - dataPtr = - rewriter.create(loc, ptrType, elemType, dataPtr, offset); - Value size = rewriter.create(loc, memRef, - ArrayRef{3, 0}); - size = rewriter.create(loc, i32, size); + auto [dataPtr, size] = + getRawPtrAndSize(loc, rewriter, adaptor.getRef(), elemType); auto mpiTraits = MPIImplTraits::get(moduleOp); Value dataType = mpiTraits->getDataType(loc, rewriter, elemType); Value commWorld = mpiTraits->getCommWorld(loc, rewriter); @@ -474,6 +604,55 @@ struct RecvOpLowering : public ConvertOpToLLVMPattern { } }; +//===----------------------------------------------------------------------===// +// AllReduceOpLowering +//===----------------------------------------------------------------------===// + +struct AllReduceOpLowering : public ConvertOpToLLVMPattern { + using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; + + LogicalResult + matchAndRewrite(mpi::AllReduceOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + Location loc = op.getLoc(); + MLIRContext *context = rewriter.getContext(); + Type i32 = rewriter.getI32Type(); + Type elemType = op.getSendbuf().getType().getElementType(); + + // ptrType `!llvm.ptr` + Type ptrType = LLVM::LLVMPointerType::get(context); + auto moduleOp = op->getParentOfType(); + auto mpiTraits = MPIImplTraits::get(moduleOp); + auto [sendPtr, sendSize] = + getRawPtrAndSize(loc, rewriter, adaptor.getSendbuf(), elemType); + auto [recvPtr, recvSize] = + getRawPtrAndSize(loc, rewriter, adaptor.getRecvbuf(), elemType); + Value dataType = mpiTraits->getDataType(loc, rewriter, elemType); + Value mpiOp = mpiTraits->getMPIOp(loc, rewriter, op.getOp()); + Value commWorld = mpiTraits->getCommWorld(loc, rewriter); + // 'int MPI_Allreduce(const void *sendbuf, void *recvbuf, int count, + // MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)' + auto funcType = LLVM::LLVMFunctionType::get( + i32, {ptrType, ptrType, i32, dataType.getType(), mpiOp.getType(), + commWorld.getType()}); + // get or create function declaration: + LLVM::LLVMFuncOp funcDecl = + getOrDefineFunction(moduleOp, loc, rewriter, "MPI_Allreduce", funcType); + + // replace op with function call + auto funcCall = rewriter.create( + loc, funcDecl, + ValueRange{sendPtr, recvPtr, sendSize, dataType, mpiOp, commWorld}); + + if (op.getRetval()) + rewriter.replaceOp(op, funcCall.getResult()); + else + rewriter.eraseOp(op); + + return success(); + } +}; + //===----------------------------------------------------------------------===// // ConvertToLLVMPatternInterface implementation //===----------------------------------------------------------------------===// @@ -498,7 +677,7 @@ struct FuncToLLVMDialectInterface : public ConvertToLLVMPatternInterface { void mpi::populateMPIToLLVMConversionPatterns(LLVMTypeConverter &converter, RewritePatternSet &patterns) { patterns.add(converter); + SendOpLowering, RecvOpLowering, AllReduceOpLowering>(converter); } void mpi::registerConvertMPIToLLVMInterface(DialectRegistry ®istry) { diff --git a/mlir/lib/Dialect/SPIRV/IR/ControlFlowOps.cpp b/mlir/lib/Dialect/SPIRV/IR/ControlFlowOps.cpp index bcfd7ebccd12d..2959d67b366b9 100644 --- a/mlir/lib/Dialect/SPIRV/IR/ControlFlowOps.cpp +++ b/mlir/lib/Dialect/SPIRV/IR/ControlFlowOps.cpp @@ -255,8 +255,7 @@ static bool hasOneBranchOpTo(Block &srcBlock, Block &dstBlock) { /// Returns true if the given `block` only contains one `spirv.mlir.merge` op. static bool isMergeBlock(Block &block) { - return !block.empty() && std::next(block.begin()) == block.end() && - isa(block.front()); + return llvm::hasSingleElement(block) && isa(block.front()); } /// Returns true if a `spirv.mlir.merge` op outside the merge block. diff --git a/mlir/test/Conversion/MPIToLLVM/ops.mlir b/mlir/test/Conversion/MPIToLLVM/mpitollvm.mlir similarity index 78% rename from mlir/test/Conversion/MPIToLLVM/ops.mlir rename to mlir/test/Conversion/MPIToLLVM/mpitollvm.mlir index 3c1b344efd50b..b630ce3a23f30 100644 --- a/mlir/test/Conversion/MPIToLLVM/ops.mlir +++ b/mlir/test/Conversion/MPIToLLVM/mpitollvm.mlir @@ -1,13 +1,13 @@ // RUN: mlir-opt -split-input-file -convert-to-llvm %s | FileCheck %s // COM: Test MPICH ABI -// CHECK: module attributes {mpi.dlti = #dlti.map<"MPI:Implementation" = "MPICH">} { +// CHECK: module attributes {dlti.map = #dlti.map<"MPI:Implementation" = "MPICH">} { // CHECK: llvm.func @MPI_Finalize() -> i32 // CHECK: llvm.func @MPI_Recv(!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 // CHECK: llvm.func @MPI_Send(!llvm.ptr, i32, i32, i32, i32, i32) -> i32 // CHECK: llvm.func @MPI_Comm_rank(i32, !llvm.ptr) -> i32 // CHECK: llvm.func @MPI_Init(!llvm.ptr, !llvm.ptr) -> i32 -module attributes { mpi.dlti = #dlti.map<"MPI:Implementation" = "MPICH"> } { +module attributes {dlti.map = #dlti.map<"MPI:Implementation" = "MPICH">} { // CHECK: llvm.func @mpi_test_mpich([[varg0:%.+]]: !llvm.ptr, [[varg1:%.+]]: !llvm.ptr, [[varg2:%.+]]: i64, [[varg3:%.+]]: i64, [[varg4:%.+]]: i64) { func.func @mpi_test_mpich(%arg0: memref<100xf32>) { @@ -73,7 +73,23 @@ module attributes { mpi.dlti = #dlti.map<"MPI:Implementation" = "MPICH"> } { // CHECK: [[v48:%.*]] = llvm.call @MPI_Recv([[v41]], [[v43]], [[v44]], [[v12]], [[v12]], [[v45]], [[v47]]) : (!llvm.ptr, i32, i32, i32, i32, i32, !llvm.ptr) -> i32 %2 = mpi.recv(%arg0, %rank, %rank) : memref<100xf32>, i32, i32 -> !mpi.retval - // CHECK: [[v49:%.*]] = llvm.call @MPI_Finalize() : () -> i32 + // CHECK: [[v49:%.*]] = llvm.extractvalue [[v5]][1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + // CHECK: [[v50:%.*]] = llvm.extractvalue [[v5]][2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + // CHECK: [[v51:%.*]] = llvm.getelementptr [[v49]][[[v50]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32 + // CHECK: [[v52:%.*]] = llvm.extractvalue [[v5]][3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + // CHECK: [[v53:%.*]] = llvm.trunc [[v52]] : i64 to i32 + // CHECK: [[v54:%.*]] = llvm.extractvalue [[v5]][1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + // CHECK: [[v55:%.*]] = llvm.extractvalue [[v5]][2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + // CHECK: [[v56:%.*]] = llvm.getelementptr [[v54]][[[v55]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32 + // CHECK: [[v57:%.*]] = llvm.extractvalue [[v5]][3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + // CHECK: [[v58:%.*]] = llvm.trunc [[v57]] : i64 to i32 + // CHECK: [[v59:%.*]] = llvm.mlir.constant(1275069450 : i32) : i32 + // CHECK: [[v60:%.*]] = llvm.mlir.constant(1476395011 : i32) : i32 + // CHECK: [[v61:%.*]] = llvm.mlir.constant(1140850688 : i32) : i32 + // CHECK: [[v62:%.*]] = llvm.call @MPI_Allreduce([[v51]], [[v56]], [[v53]], [[v59]], [[v60]], [[v61]]) : (!llvm.ptr, !llvm.ptr, i32, i32, i32, i32) -> i32 + mpi.allreduce(%arg0, %arg0, MPI_SUM) : memref<100xf32>, memref<100xf32> + + // CHECK: llvm.call @MPI_Finalize() : () -> i32 %3 = mpi.finalize : !mpi.retval return @@ -83,7 +99,7 @@ module attributes { mpi.dlti = #dlti.map<"MPI:Implementation" = "MPICH"> } { // ----- // COM: Test OpenMPI ABI -// CHECK: module attributes {mpi.dlti = #dlti.map<"MPI:Implementation" = "OpenMPI">} { +// CHECK: module attributes {dlti.map = #dlti.map<"MPI:Implementation" = "OpenMPI">} { // CHECK: llvm.func @MPI_Finalize() -> i32 // CHECK: llvm.func @MPI_Recv(!llvm.ptr, i32, !llvm.ptr, i32, i32, !llvm.ptr, !llvm.ptr) -> i32 // CHECK: llvm.func @MPI_Send(!llvm.ptr, i32, !llvm.ptr, i32, i32, !llvm.ptr) -> i32 @@ -91,7 +107,7 @@ module attributes { mpi.dlti = #dlti.map<"MPI:Implementation" = "MPICH"> } { // CHECK: llvm.func @MPI_Comm_rank(!llvm.ptr, !llvm.ptr) -> i32 // CHECK: llvm.mlir.global external @ompi_mpi_comm_world() {addr_space = 0 : i32} : !llvm.struct<"ompi_communicator_t", opaque> // CHECK: llvm.func @MPI_Init(!llvm.ptr, !llvm.ptr) -> i32 -module attributes { mpi.dlti = #dlti.map<"MPI:Implementation" = "OpenMPI"> } { +module attributes { dlti.map = #dlti.map<"MPI:Implementation" = "OpenMPI"> } { // CHECK: llvm.func @mpi_test_openmpi([[varg0:%.+]]: !llvm.ptr, [[varg1:%.+]]: !llvm.ptr, [[varg2:%.+]]: i64, [[varg3:%.+]]: i64, [[varg4:%.+]]: i64) { func.func @mpi_test_openmpi(%arg0: memref<100xf32>) { @@ -157,6 +173,22 @@ module attributes { mpi.dlti = #dlti.map<"MPI:Implementation" = "OpenMPI"> } { // CHECK: [[v48:%.*]] = llvm.call @MPI_Recv([[v41]], [[v43]], [[v44]], [[v12]], [[v12]], [[v45]], [[v47]]) : (!llvm.ptr, i32, !llvm.ptr, i32, i32, !llvm.ptr, !llvm.ptr) -> i32 %2 = mpi.recv(%arg0, %rank, %rank) : memref<100xf32>, i32, i32 -> !mpi.retval + // CHECK: [[v49:%.*]] = llvm.extractvalue [[v5]][1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + // CHECK: [[v50:%.*]] = llvm.extractvalue [[v5]][2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + // CHECK: [[v51:%.*]] = llvm.getelementptr [[v49]][[[v50]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32 + // CHECK: [[v52:%.*]] = llvm.extractvalue [[v5]][3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + // CHECK: [[v53:%.*]] = llvm.trunc [[v52]] : i64 to i32 + // CHECK: [[v54:%.*]] = llvm.extractvalue [[v5]][1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + // CHECK: [[v55:%.*]] = llvm.extractvalue [[v5]][2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + // CHECK: [[v56:%.*]] = llvm.getelementptr [[v54]][[[v55]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32 + // CHECK: [[v57:%.*]] = llvm.extractvalue [[v5]][3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> + // CHECK: [[v58:%.*]] = llvm.trunc [[v57]] : i64 to i32 + // CHECK: [[v59:%.*]] = llvm.mlir.addressof @ompi_mpi_float : !llvm.ptr + // CHECK: [[v60:%.*]] = llvm.mlir.addressof @ompi_mpi_sum : !llvm.ptr + // CHECK: [[v61:%.*]] = llvm.mlir.addressof @ompi_mpi_comm_world : !llvm.ptr + // CHECK: [[v62:%.*]] = llvm.call @MPI_Allreduce([[v51]], [[v56]], [[v53]], [[v59]], [[v60]], [[v61]]) : (!llvm.ptr, !llvm.ptr, i32, !llvm.ptr, !llvm.ptr, !llvm.ptr) -> i32 + mpi.allreduce(%arg0, %arg0, MPI_SUM) : memref<100xf32>, memref<100xf32> + // CHECK: [[v49:%.*]] = llvm.call @MPI_Finalize() : () -> i32 %3 = mpi.finalize : !mpi.retval diff --git a/mlir/test/Dialect/MPI/ops.mlir b/mlir/test/Dialect/MPI/mpiops.mlir similarity index 88% rename from mlir/test/Dialect/MPI/ops.mlir rename to mlir/test/Dialect/MPI/mpiops.mlir index f23a7e18a2ee9..fb4333611a246 100644 --- a/mlir/test/Dialect/MPI/ops.mlir +++ b/mlir/test/Dialect/MPI/mpiops.mlir @@ -48,11 +48,11 @@ func.func @mpi_test(%ref : memref<100xf32>) -> () { // CHECK-NEXT: %5 = mpi.barrier : !mpi.retval %err7 = mpi.barrier : !mpi.retval - // CHECK-NEXT: mpi.allreduce(%arg0, %arg0, ) : memref<100xf32>, memref<100xf32> - mpi.allreduce(%ref, %ref, ) : memref<100xf32>, memref<100xf32> + // CHECK-NEXT: mpi.allreduce(%arg0, %arg0, MPI_SUM) : memref<100xf32>, memref<100xf32> + mpi.allreduce(%ref, %ref, MPI_SUM) : memref<100xf32>, memref<100xf32> - // CHECK-NEXT: mpi.allreduce(%arg0, %arg0, ) : memref<100xf32>, memref<100xf32> -> !mpi.retval - %err8 = mpi.allreduce(%ref, %ref, ) : memref<100xf32>, memref<100xf32> -> !mpi.retval + // CHECK-NEXT: mpi.allreduce(%arg0, %arg0, MPI_SUM) : memref<100xf32>, memref<100xf32> -> !mpi.retval + %err8 = mpi.allreduce(%ref, %ref, MPI_SUM) : memref<100xf32>, memref<100xf32> -> !mpi.retval // CHECK-NEXT: %7 = mpi.finalize : !mpi.retval %rval = mpi.finalize : !mpi.retval diff --git a/polly/include/polly/CodeGen/IRBuilder.h b/polly/include/polly/CodeGen/IRBuilder.h index 6641ac9a0c068..5a111e9a2cb7c 100644 --- a/polly/include/polly/CodeGen/IRBuilder.h +++ b/polly/include/polly/CodeGen/IRBuilder.h @@ -79,7 +79,7 @@ class ScopAnnotator { void addAlternativeAliasBases( llvm::DenseMap, llvm::AssertingVH> &NewMap) { - AlternativeAliasBases.insert(NewMap.begin(), NewMap.end()); + AlternativeAliasBases.insert_range(NewMap); } /// Delete the set of alternative alias bases diff --git a/polly/lib/CodeGen/BlockGenerators.cpp b/polly/lib/CodeGen/BlockGenerators.cpp index b2e3b5d32fbe2..cf2cc65e0f042 100644 --- a/polly/lib/CodeGen/BlockGenerators.cpp +++ b/polly/lib/CodeGen/BlockGenerators.cpp @@ -76,8 +76,8 @@ Value *BlockGenerator::trySynthesizeNewValue(ScopStmt &Stmt, Value *Old, return nullptr; ValueMapT VTV; - VTV.insert(BBMap.begin(), BBMap.end()); - VTV.insert(GlobalMap.begin(), GlobalMap.end()); + VTV.insert_range(BBMap); + VTV.insert_range(GlobalMap); Scop &S = *Stmt.getParent(); const DataLayout &DL = S.getFunction().getDataLayout(); @@ -1131,7 +1131,7 @@ void RegionGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT <S, // Remember value in case it is visible after this subregion. if (isDominatingSubregionExit(DT, R, BB)) - ValueMap.insert(RegionMap.begin(), RegionMap.end()); + ValueMap.insert_range(RegionMap); } // Now create a new dedicated region exit block and add it to the region map. @@ -1164,7 +1164,7 @@ void RegionGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT <S, Instruction *BICopy = BBCopyEnd->getTerminator(); ValueMapT &RegionMap = RegionMaps[BBCopyStart]; - RegionMap.insert(StartBlockMap.begin(), StartBlockMap.end()); + RegionMap.insert_range(StartBlockMap); Builder.SetInsertPoint(BICopy); copyInstScalar(Stmt, TI, RegionMap, LTS); diff --git a/polly/lib/CodeGen/IslNodeBuilder.cpp b/polly/lib/CodeGen/IslNodeBuilder.cpp index ca497927e2976..e818dab4f9c0c 100644 --- a/polly/lib/CodeGen/IslNodeBuilder.cpp +++ b/polly/lib/CodeGen/IslNodeBuilder.cpp @@ -895,7 +895,7 @@ void IslNodeBuilder::createUser(__isl_take isl_ast_node *User) { Id = isl_ast_expr_get_id(StmtExpr); isl_ast_expr_free(StmtExpr); - LTS.insert(OutsideLoopIterations.begin(), OutsideLoopIterations.end()); + LTS.insert_range(OutsideLoopIterations); Stmt = (ScopStmt *)isl_id_get_user(Id); auto *NewAccesses = createNewAccesses(Stmt, User); diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index d643a5e969132..072147b7b6150 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -5582,6 +5582,12 @@ binary_alias( binary = ":llvm-readobj", ) +# Workaround inability to put `.def` files into `srcs`. +cc_library( + name = "llvm-reduce-defs-lib", + textual_hdrs = glob(["tools/llvm-reduce/*.def"]), +) + cc_binary( name = "llvm-reduce", srcs = glob([ @@ -5609,6 +5615,7 @@ cc_binary( ":TargetParser", ":TransformUtils", ":config", + ":llvm-reduce-defs-lib", ], )