diff --git a/.github/workflows/libclang-abi-tests.yml b/.github/workflows/libclang-abi-tests.yml index 41b3075288d2d..65cffccff776f 100644 --- a/.github/workflows/libclang-abi-tests.yml +++ b/.github/workflows/libclang-abi-tests.yml @@ -103,7 +103,8 @@ jobs: uses: llvm/actions/install-ninja@main - name: Install abi-compliance-checker run: | - sudo apt-get install abi-dumper autoconf pkg-config + sudo apt-get update + sudo apt-get install -y abi-dumper autoconf pkg-config - name: Install universal-ctags run: | git clone https://github.com/universal-ctags/ctags.git @@ -154,7 +155,9 @@ jobs: path: build-latest - name: Install abi-compliance-checker - run: sudo apt-get install abi-compliance-checker + run: | + sudo apt-get update + sudo apt-get install -y abi-compliance-checker - name: Compare ABI run: | for lib in ${{ needs.abi-dump-setup.outputs.ABI_LIBS }}; do diff --git a/.github/workflows/llvm-tests.yml b/.github/workflows/llvm-tests.yml index 9b3d49d4e99b9..e2ca2ff44890e 100644 --- a/.github/workflows/llvm-tests.yml +++ b/.github/workflows/llvm-tests.yml @@ -91,7 +91,8 @@ jobs: uses: llvm/actions/install-ninja@main - name: Install abi-compliance-checker run: | - sudo apt-get install abi-dumper autoconf pkg-config + sudo apt-get update + sudo apt-get -y install abi-dumper autoconf pkg-config - name: Install universal-ctags run: | git clone https://github.com/universal-ctags/ctags.git @@ -163,7 +164,9 @@ jobs: path: symbol-list - name: Install abi-compliance-checker - run: sudo apt-get install abi-compliance-checker + run: | + sudo apt-get update + sudo apt-get -y install abi-compliance-checker - name: Compare ABI run: | if [ -s symbol-list/llvm.symbols ]; then diff --git a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp index 4c75b42270114..71e852545203e 100644 --- a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp +++ b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp @@ -311,18 +311,7 @@ ClangTidyDiagnosticConsumer::ClangTidyDiagnosticConsumer( : Context(Ctx), ExternalDiagEngine(ExternalDiagEngine), RemoveIncompatibleErrors(RemoveIncompatibleErrors), GetFixesFromNotes(GetFixesFromNotes), - EnableNolintBlocks(EnableNolintBlocks) { - - if (Context.getOptions().HeaderFilterRegex && - !Context.getOptions().HeaderFilterRegex->empty()) - HeaderFilter = - std::make_unique(*Context.getOptions().HeaderFilterRegex); - - if (Context.getOptions().ExcludeHeaderFilterRegex && - !Context.getOptions().ExcludeHeaderFilterRegex->empty()) - ExcludeHeaderFilter = std::make_unique( - *Context.getOptions().ExcludeHeaderFilterRegex); -} + EnableNolintBlocks(EnableNolintBlocks) {} void ClangTidyDiagnosticConsumer::finalizeLastError() { if (!Errors.empty()) { @@ -571,17 +560,30 @@ void ClangTidyDiagnosticConsumer::checkFilters(SourceLocation Location, } StringRef FileName(File->getName()); - LastErrorRelatesToUserCode = - LastErrorRelatesToUserCode || Sources.isInMainFile(Location) || - (HeaderFilter && - (HeaderFilter->match(FileName) && - !(ExcludeHeaderFilter && ExcludeHeaderFilter->match(FileName)))); + LastErrorRelatesToUserCode = LastErrorRelatesToUserCode || + Sources.isInMainFile(Location) || + (getHeaderFilter()->match(FileName) && + !getExcludeHeaderFilter()->match(FileName)); unsigned LineNumber = Sources.getExpansionLineNumber(Location); LastErrorPassesLineFilter = LastErrorPassesLineFilter || passesLineFilter(FileName, LineNumber); } +llvm::Regex *ClangTidyDiagnosticConsumer::getHeaderFilter() { + if (!HeaderFilter) + HeaderFilter = + std::make_unique(*Context.getOptions().HeaderFilterRegex); + return HeaderFilter.get(); +} + +llvm::Regex *ClangTidyDiagnosticConsumer::getExcludeHeaderFilter() { + if (!ExcludeHeaderFilter) + ExcludeHeaderFilter = std::make_unique( + *Context.getOptions().ExcludeHeaderFilterRegex); + return ExcludeHeaderFilter.get(); +} + void ClangTidyDiagnosticConsumer::removeIncompatibleErrors() { // Each error is modelled as the set of intervals in which it applies // replacements. To detect overlapping replacements, we use a sweep line diff --git a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h index ff42f96a0477b..d6cf6a2b2731e 100644 --- a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h +++ b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h @@ -302,6 +302,10 @@ class ClangTidyDiagnosticConsumer : public DiagnosticConsumer { /// context. llvm::Regex *getHeaderFilter(); + /// Returns the \c ExcludeHeaderFilter constructed for the options set in the + /// context. + llvm::Regex *getExcludeHeaderFilter(); + /// Updates \c LastErrorRelatesToUserCode and LastErrorPassesLineFilter /// according to the diagnostic \p Location. void checkFilters(SourceLocation Location, const SourceManager &Sources); diff --git a/clang-tools-extra/clang-tidy/ClangTidyOptions.cpp b/clang-tools-extra/clang-tidy/ClangTidyOptions.cpp index 8bac6f161fa05..dd1d86882f5d4 100644 --- a/clang-tools-extra/clang-tidy/ClangTidyOptions.cpp +++ b/clang-tools-extra/clang-tidy/ClangTidyOptions.cpp @@ -194,8 +194,8 @@ ClangTidyOptions ClangTidyOptions::getDefaults() { Options.WarningsAsErrors = ""; Options.HeaderFileExtensions = {"", "h", "hh", "hpp", "hxx"}; Options.ImplementationFileExtensions = {"c", "cc", "cpp", "cxx"}; - Options.HeaderFilterRegex = std::nullopt; - Options.ExcludeHeaderFilterRegex = std::nullopt; + Options.HeaderFilterRegex = ""; + Options.ExcludeHeaderFilterRegex = ""; Options.SystemHeaders = false; Options.FormatStyle = "none"; Options.User = std::nullopt; diff --git a/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py b/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py index f1b934f7139e9..8741147a4f8a3 100755 --- a/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py +++ b/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py @@ -87,7 +87,7 @@ def find_compilation_database(path: str) -> str: def get_tidy_invocation( - f: str, + f: Optional[str], clang_tidy_binary: str, checks: str, tmpdir: Optional[str], @@ -147,7 +147,8 @@ def get_tidy_invocation( start.append(f"--warnings-as-errors={warnings_as_errors}") if allow_no_checks: start.append("--allow-no-checks") - start.append(f) + if f: + start.append(f) return start @@ -490,7 +491,7 @@ async def main() -> None: try: invocation = get_tidy_invocation( - "", + None, clang_tidy_binary, args.checks, None, diff --git a/clang-tools-extra/clangd/InlayHints.cpp b/clang-tools-extra/clangd/InlayHints.cpp index 1b1bcf78c9855..a2b856ad30519 100644 --- a/clang-tools-extra/clangd/InlayHints.cpp +++ b/clang-tools-extra/clangd/InlayHints.cpp @@ -33,6 +33,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/ADT/identity.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormatVariadic.h" @@ -368,7 +369,11 @@ static FunctionProtoTypeLoc getPrototypeLoc(Expr *Fn) { } if (auto F = Target.getAs()) { - return F; + // In some edge cases the AST can contain a "trivial" FunctionProtoTypeLoc + // which has null parameters. Avoid these as they don't contain useful + // information. + if (llvm::all_of(F.getParams(), llvm::identity())) + return F; } return {}; diff --git a/clang-tools-extra/clangd/unittests/InlayHintTests.cpp b/clang-tools-extra/clangd/unittests/InlayHintTests.cpp index 77d78b8777fe3..8ed8401f9fce9 100644 --- a/clang-tools-extra/clangd/unittests/InlayHintTests.cpp +++ b/clang-tools-extra/clangd/unittests/InlayHintTests.cpp @@ -997,11 +997,16 @@ TEST(ParameterHints, FunctionPointer) { f3_t f3; using f4_t = void(__stdcall *)(int param); f4_t f4; + __attribute__((noreturn)) f4_t f5; void bar() { f1($f1[[42]]); f2($f2[[42]]); f3($f3[[42]]); f4($f4[[42]]); + // This one runs into an edge case in clang's type model + // and we can't extract the parameter name. But at least + // we shouldn't crash. + f5(42); } )cpp", ExpectedHint{"param: ", "f1"}, ExpectedHint{"param: ", "f2"}, diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 316ac1743ccb7..0b2e9c5fabc36 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -187,6 +187,12 @@ Improvements to clang-tidy :doc:`readability-redundant-access-specifiers `, CheckFirstDeclaration :doc:`readability-redundant-casting `, IgnoreTypeAliases +- Fixed bug in :program:`clang-tidy` by which `HeaderFilterRegex` did not take + effect when passed via the `.clang-tidy` file. + +- Fixed bug in :program:`run_clang_tidy.py` where the program would not + correctly display the checks enabled by the top-level `.clang-tidy` file. + New checks ^^^^^^^^^^ diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/header-filter-from-config-file/inheritance/.clang-tidy b/clang-tools-extra/test/clang-tidy/infrastructure/header-filter-from-config-file/inheritance/.clang-tidy new file mode 100644 index 0000000000000..f4210353f94de --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/infrastructure/header-filter-from-config-file/inheritance/.clang-tidy @@ -0,0 +1 @@ +HeaderFilterRegex: '.*' diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/header-filter-from-config-file/inheritance/foo.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/header-filter-from-config-file/inheritance/foo.cpp new file mode 100644 index 0000000000000..5828c2cafaf7d --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/infrastructure/header-filter-from-config-file/inheritance/foo.cpp @@ -0,0 +1,3 @@ +// RUN: clang-tidy -checks=-*,google-explicit-constructor %s 2>&1 | FileCheck %s +#include "foo.h" +// CHECK: foo.h:1:12: warning: single-argument constructors must be marked explicit diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/header-filter-from-config-file/inheritance/foo.h b/clang-tools-extra/test/clang-tidy/infrastructure/header-filter-from-config-file/inheritance/foo.h new file mode 100644 index 0000000000000..f61d4c2923b50 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/infrastructure/header-filter-from-config-file/inheritance/foo.h @@ -0,0 +1 @@ +struct X { X(int); }; diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/header-filter-from-config-file/inheritance/subfolder/.clang-tidy b/clang-tools-extra/test/clang-tidy/infrastructure/header-filter-from-config-file/inheritance/subfolder/.clang-tidy new file mode 100644 index 0000000000000..96706c1428047 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/infrastructure/header-filter-from-config-file/inheritance/subfolder/.clang-tidy @@ -0,0 +1,2 @@ +InheritParentConfig: true +HeaderFilterRegex: 'subfolder/.*' diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/header-filter-from-config-file/inheritance/subfolder/bar.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/header-filter-from-config-file/inheritance/subfolder/bar.cpp new file mode 100644 index 0000000000000..229ba52e2695a --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/infrastructure/header-filter-from-config-file/inheritance/subfolder/bar.cpp @@ -0,0 +1,8 @@ +// shell is required for the "dirname" command +// REQUIRES: shell +// RUN: clang-tidy -checks=-*,google-explicit-constructor %s -- -I "$(dirname %S)" 2>&1 | FileCheck %s +#include "foo.h" +// CHECK-NOT: foo.h:1:12: warning: single-argument constructors must be marked explicit + +#include "bar.h" +// CHECK: bar.h:1:13: warning: single-argument constructors must be marked explicit diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/header-filter-from-config-file/inheritance/subfolder/bar.h b/clang-tools-extra/test/clang-tidy/infrastructure/header-filter-from-config-file/inheritance/subfolder/bar.h new file mode 100644 index 0000000000000..ee12d00d334dd --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/infrastructure/header-filter-from-config-file/inheritance/subfolder/bar.h @@ -0,0 +1 @@ +struct XX { XX(int); }; diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/header-filter-from-config-file/simple/.clang-tidy b/clang-tools-extra/test/clang-tidy/infrastructure/header-filter-from-config-file/simple/.clang-tidy new file mode 100644 index 0000000000000..f4210353f94de --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/infrastructure/header-filter-from-config-file/simple/.clang-tidy @@ -0,0 +1 @@ +HeaderFilterRegex: '.*' diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/header-filter-from-config-file/simple/foo.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/header-filter-from-config-file/simple/foo.cpp new file mode 100644 index 0000000000000..5828c2cafaf7d --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/infrastructure/header-filter-from-config-file/simple/foo.cpp @@ -0,0 +1,3 @@ +// RUN: clang-tidy -checks=-*,google-explicit-constructor %s 2>&1 | FileCheck %s +#include "foo.h" +// CHECK: foo.h:1:12: warning: single-argument constructors must be marked explicit diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/header-filter-from-config-file/simple/foo.h b/clang-tools-extra/test/clang-tidy/infrastructure/header-filter-from-config-file/simple/foo.h new file mode 100644 index 0000000000000..f61d4c2923b50 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/infrastructure/header-filter-from-config-file/simple/foo.h @@ -0,0 +1 @@ +struct X { X(int); }; diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 03b68271b7864..262bf4e3d4f5b 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -314,6 +314,9 @@ C++2c Feature Support - Implemented `P3176R1 The Oxford variadic comma `_ +- The error produced when doing arithmetic operations on enums of different types + can be disabled with ``-Wno-enum-enum-conversion``. (#GH92340) + C++23 Feature Support ^^^^^^^^^^^^^^^^^^^^^ - Removed the restriction to literal types in constexpr functions in C++23 mode. @@ -545,6 +548,11 @@ New Compiler Flags - The ``-Warray-compare-cxx26`` warning has been added to warn about array comparison starting from C++26, this warning is enabled as an error by default. +- The ``-Wnontrivial-memcall`` warning has been added to warn about + passing non-trivially-copyable destination parameter to ``memcpy``, + ``memset`` and similar functions for which it is a documented undefined + behavior. It is implied by ``-Wnontrivial-memaccess`` + - clang-cl and clang-dxc now support ``-fdiagnostics-color=[auto|never|always]`` in addition to ``-f[no-]color-diagnostics``. @@ -576,11 +584,6 @@ Modified Compiler Flags to utilize these vector libraries. The behavior for all other vector function libraries remains unchanged. -- The ``-Wnontrivial-memcall`` warning has been added to warn about - passing non-trivially-copyable destination parameter to ``memcpy``, - ``memset`` and similar functions for which it is a documented undefined - behavior. It is implied by ``-Wnontrivial-memaccess`` - - Added ``-fmodules-reduced-bmi`` flag corresponding to ``-fexperimental-modules-reduced-bmi`` flag. The ``-fmodules-reduced-bmi`` flag is intended to be enabled by default in the future. @@ -694,6 +697,16 @@ Improvements to Clang's diagnostics match a template template parameter, in terms of the C++17 relaxed matching rules instead of the old ones. +- No longer diagnosing idiomatic function pointer casts on Windows under + ``-Wcast-function-type-mismatch`` (which is enabled by ``-Wextra``). Clang + would previously warn on this construct, but will no longer do so on Windows: + + .. code-block:: c + + typedef void (WINAPI *PGNSI)(LPSYSTEM_INFO); + HMODULE Lib = LoadLibrary("kernel32"); + PGNSI FnPtr = (PGNSI)GetProcAddress(Lib, "GetNativeSystemInfo"); + - Don't emit duplicated dangling diagnostics. (#GH93386). - Improved diagnostic when trying to befriend a concept. (#GH45182). @@ -899,6 +912,8 @@ Bug Fixes in This Version being deleted has a potentially throwing destructor (#GH118660). - Clang now outputs correct values when #embed data contains bytes with negative signed char values (#GH102798). +- Fix crash due to unknown references and pointer implementation and handling of + base classes. (GH139452) Bug Fixes to Compiler Builtins ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1058,8 +1073,13 @@ Bug Fixes to C++ Support - Fixed a substitution bug in transforming CTAD aliases when the type alias contains a non-pack template argument corresponding to a pack parameter (#GH124715) - Clang is now better at keeping track of friend function template instance contexts. (#GH55509) +- Fixes matching of nested template template parameters. (#GH130362) +- Correctly diagnoses template template paramters which have a pack parameter + not in the last position. - Fixed an integer overflow bug in computing template parameter depths when synthesizing CTAD guides. (#GH128691) - Fixed an incorrect pointer access when checking access-control on concepts. (#GH131530) +- Fixed various alias CTAD bugs involving variadic template arguments. (#GH123591), (#GH127539), (#GH129077), + (#GH129620), and (#GH129998). Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1098,6 +1118,7 @@ Miscellaneous Clang Crashes Fixed - Fixed a crash when an unscoped enumeration declared by an opaque-enum-declaration within a class template with a dependent underlying type is subject to integral promotion. (#GH117960) +- Fix code completion crash involving PCH serialzied templates. (#GH139019) OpenACC Specific Changes ------------------------ @@ -1252,6 +1273,8 @@ RISC-V Support - The option ``-mcmodel=large`` for the large code model is supported. - Bump RVV intrinsic to version 1.0, the spec: https://github.com/riscv-non-isa/rvv-intrinsic-doc/releases/tag/v1.0.0-rc4 +- `Zicsr` / `Zifencei` are allowed to be duplicated in the presence of `g` in `-march`. + CUDA/HIP Language Changes ^^^^^^^^^^^^^^^^^^^^^^^^^ - Fixed a bug about overriding a constexpr pure-virtual member function with a non-constexpr virtual member function which causes compilation failure when including standard C++ header `format`. @@ -1450,6 +1473,9 @@ Crash and bug fixes - The ``unix.BlockInCriticalSection`` now recognizes the ``lock()`` member function as expected, even if it's inherited from a base class. Fixes (#GH104241). +- Fixed a crash when C++20 parenthesized initializer lists are used. This issue + was causing a crash in clang-tidy. (#GH136041) + Improvements ^^^^^^^^^^^^ diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h index 7be4022649329..06ac0f1704aa9 100644 --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -1752,7 +1752,14 @@ enum class StringLiteralKind { UTF8, UTF16, UTF32, - Unevaluated + Unevaluated, + // Binary kind of string literal is used for the data coming via #embed + // directive. File's binary contents is transformed to a special kind of + // string literal that in some cases may be used directly as an initializer + // and some features of classic string literals are not applicable to this + // kind of a string literal, for example finding a particular byte's source + // location for better diagnosing. + Binary }; /// StringLiteral - This represents a string literal expression, e.g. "foo" @@ -1884,6 +1891,8 @@ class StringLiteral final int64_t getCodeUnitS(size_t I, uint64_t BitWidth) const { int64_t V = getCodeUnit(I); if (isOrdinary() || isWide()) { + // Ordinary and wide string literals have types that can be signed. + // It is important for checking C23 constexpr initializers. unsigned Width = getCharByteWidth() * BitWidth; llvm::APInt AInt(Width, (uint64_t)V); V = AInt.getSExtValue(); @@ -4965,9 +4974,9 @@ class EmbedExpr final : public Expr { assert(EExpr && CurOffset != ULLONG_MAX && "trying to dereference an invalid iterator"); IntegerLiteral *N = EExpr->FakeChildNode; - StringRef DataRef = EExpr->Data->BinaryData->getBytes(); N->setValue(*EExpr->Ctx, - llvm::APInt(N->getValue().getBitWidth(), DataRef[CurOffset], + llvm::APInt(N->getValue().getBitWidth(), + EExpr->Data->BinaryData->getCodeUnit(CurOffset), N->getType()->isSignedIntegerType())); // We want to return a reference to the fake child node in the // EmbedExpr, not the local variable N. diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index ec2a140e04d5b..7180447e250ce 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -7567,9 +7567,13 @@ def warn_arith_conv_mixed_enum_types_cxx20 : Warning< "%sub{select_arith_conv_kind}0 " "different enumeration types%diff{ ($ and $)|}1,2 is deprecated">, InGroup; -def err_conv_mixed_enum_types_cxx26 : Error< + +def err_conv_mixed_enum_types: Error < "invalid %sub{select_arith_conv_kind}0 " "different enumeration types%diff{ ($ and $)|}1,2">; +def zzzz_warn_conv_mixed_enum_types_cxx26 : Warning < + err_conv_mixed_enum_types.Summary>, + InGroup, DefaultError; def warn_arith_conv_mixed_anon_enum_types : Warning< warn_arith_conv_mixed_enum_types.Summary>, diff --git a/clang/include/clang/Driver/Distro.h b/clang/include/clang/Driver/Distro.h index b4d485dac8a26..c544a8c002191 100644 --- a/clang/include/clang/Driver/Distro.h +++ b/clang/include/clang/Driver/Distro.h @@ -39,6 +39,8 @@ class Distro { DebianBullseye, DebianBookworm, DebianTrixie, + DebianForky, + DebianDuke, Exherbo, RHEL5, RHEL6, @@ -128,7 +130,7 @@ class Distro { bool IsOpenSUSE() const { return DistroVal == OpenSUSE; } bool IsDebian() const { - return DistroVal >= DebianLenny && DistroVal <= DebianTrixie; + return DistroVal >= DebianLenny && DistroVal <= DebianDuke; } bool IsUbuntu() const { diff --git a/clang/include/clang/Interpreter/Interpreter.h b/clang/include/clang/Interpreter/Interpreter.h index b1b63aedf86ab..78dff1165dcf5 100644 --- a/clang/include/clang/Interpreter/Interpreter.h +++ b/clang/include/clang/Interpreter/Interpreter.h @@ -20,6 +20,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ExecutionEngine/JITSymbol.h" +#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h" #include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h" #include "llvm/Support/Error.h" #include @@ -41,6 +42,7 @@ class CXXRecordDecl; class Decl; class IncrementalExecutor; class IncrementalParser; +class IncrementalCUDADeviceParser; /// Create a pre-configured \c CompilerInstance for incremental processing. class IncrementalCompilerBuilder { @@ -93,7 +95,10 @@ class Interpreter { std::unique_ptr IncrExecutor; // An optional parser for CUDA offloading - std::unique_ptr DeviceParser; + std::unique_ptr DeviceParser; + + // An optional action for CUDA offloading + std::unique_ptr DeviceAct; /// List containing information about each incrementally parsed piece of code. std::list PTUs; @@ -112,6 +117,9 @@ class Interpreter { /// Compiler instance performing the incremental compilation. std::unique_ptr CI; + /// An optional compiler instance for CUDA offloading + std::unique_ptr DeviceCI; + protected: // Derived classes can use an extended interface of the Interpreter. Interpreter(std::unique_ptr Instance, llvm::Error &Err, @@ -129,10 +137,14 @@ class Interpreter { public: virtual ~Interpreter(); static llvm::Expected> - create(std::unique_ptr CI); + create(std::unique_ptr CI, + std::unique_ptr JITBuilder = nullptr); static llvm::Expected> createWithCUDA(std::unique_ptr CI, std::unique_ptr DCI); + static llvm::Expected> + createLLJITBuilder(std::unique_ptr EPC, + llvm::StringRef OrcRuntimePath); const ASTContext &getASTContext() const; ASTContext &getASTContext(); const CompilerInstance *getCompilerInstance() const; @@ -175,10 +187,11 @@ class Interpreter { llvm::Expected ExtractValueFromExpr(Expr *E); llvm::Expected CompileDtorCall(CXXRecordDecl *CXXRD); - CodeGenerator *getCodeGen() const; - std::unique_ptr GenModule(); + CodeGenerator *getCodeGen(IncrementalAction *Action = nullptr) const; + std::unique_ptr GenModule(IncrementalAction *Action = nullptr); PartialTranslationUnit &RegisterPTU(TranslationUnitDecl *TU, - std::unique_ptr M = {}); + std::unique_ptr M = {}, + IncrementalAction *Action = nullptr); // A cache for the compiled destructors used to for de-allocation of managed // clang::Values. diff --git a/clang/include/clang/Interpreter/RemoteJITUtils.h b/clang/include/clang/Interpreter/RemoteJITUtils.h new file mode 100644 index 0000000000000..85fb66ddfa7c7 --- /dev/null +++ b/clang/include/clang/Interpreter/RemoteJITUtils.h @@ -0,0 +1,43 @@ +//===-- RemoteJITUtils.h - Utilities for remote-JITing ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Utilities for ExecutorProcessControl-based remote JITing with Orc and +// JITLink. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_INTERPRETER_REMOTEJITUTILS_H +#define LLVM_CLANG_INTERPRETER_REMOTEJITUTILS_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/ExecutionEngine/Orc/Core.h" +#include "llvm/ExecutionEngine/Orc/Layer.h" +#include "llvm/ExecutionEngine/Orc/SimpleRemoteEPC.h" +#include "llvm/Support/Error.h" + +#include +#include +#include + +llvm::Expected> +launchExecutor(llvm::StringRef ExecutablePath, bool UseSharedMemory, + llvm::StringRef SlabAllocateSizeString, int stdin_fd, int stdout_fd, int stderr_fd); + +/// Create a JITLinkExecutor that connects to the given network address +/// through a TCP socket. A valid NetworkAddress provides hostname and port, +/// e.g. localhost:20000. +llvm::Expected> +connectTCPSocket(llvm::StringRef NetworkAddress, bool UseSharedMemory, + llvm::StringRef SlabAllocateSizeString); + + +/// Get the PID of the last launched executor. +/// This is useful for debugging or for cleanup purposes. +pid_t getLastLaunchedExecutorPID(); + +#endif // LLVM_CLANG_INTERPRETER_REMOTEJITUTILS_H diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index cecf5cff332f4..d8cc0171c22c6 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -11279,14 +11279,16 @@ class Sema final : public SemaBase { /// The context in which we are checking a template parameter list. enum TemplateParamListContext { - TPC_ClassTemplate, - TPC_VarTemplate, + // For this context, Class, Variable, TypeAlias, and non-pack Template + // Template Parameters are treated uniformly. + TPC_Other, + TPC_FunctionTemplate, TPC_ClassTemplateMember, TPC_FriendClassTemplate, TPC_FriendFunctionTemplate, TPC_FriendFunctionTemplateDefinition, - TPC_TypeAliasTemplate + TPC_TemplateTemplateParameterPack, }; /// Checks the validity of a template parameter list, possibly diff --git a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def index 34bb7a809162b..dbb8e832db5ff 100644 --- a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def +++ b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def @@ -385,6 +385,19 @@ ANALYZER_OPTION( "flex\" won't be analyzed.", true) +ANALYZER_OPTION( + bool, InlineFunctionsWithAmbiguousLoops, "inline-functions-with-ambiguous-loops", + "If disabled (the default), the analyzer puts functions on a \"do not " + "inline this\" list if it finds an execution path within that function " + "that may potentially perform 'analyzer-max-loop' (= 4 by default) " + "iterations in a loop. (Note that functions that _definitely_ reach the " + "loop limit on some execution path are currently marked as \"do not " + "inline\" even if this option is enabled.) Enabling this option " + "eliminates this (somewhat arbitrary) restriction from the analysis " + "scope, which increases the analysis runtime (on average by ~10%, but " + "a few translation units may see much larger slowdowns).", + false) + //===----------------------------------------------------------------------===// // Unsigned analyzer options. //===----------------------------------------------------------------------===// diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h index 3ee0d229cfc29..761395260a0cf 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h @@ -81,10 +81,6 @@ class FunctionSummariesTy { I->second.MayInline = 0; } - void markReachedMaxBlockCount(const Decl *D) { - markShouldNotInline(D); - } - std::optional mayInline(const Decl *D) { MapTy::const_iterator I = Map.find(D); if (I != Map.end() && I->second.InlineChecked) diff --git a/clang/include/clang/Support/Compiler.h b/clang/include/clang/Support/Compiler.h index 13582b899dc2a..5a74f8e3b6723 100644 --- a/clang/include/clang/Support/Compiler.h +++ b/clang/include/clang/Support/Compiler.h @@ -54,7 +54,7 @@ #define CLANG_ABI LLVM_ATTRIBUTE_VISIBILITY_DEFAULT #define CLANG_TEMPLATE_ABI LLVM_ATTRIBUTE_VISIBILITY_DEFAULT #define CLANG_EXPORT_TEMPLATE -#elif defined(__MACH__) || defined(__WASM__) +#elif defined(__MACH__) || defined(__WASM__) || defined(__EMSCRIPTEN__) #define CLANG_ABI LLVM_ATTRIBUTE_VISIBILITY_DEFAULT #define CLANG_TEMPLATE_ABI #define CLANG_EXPORT_TEMPLATE diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index aa7e14329a21b..a5b7ef8c4271b 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -1104,6 +1104,7 @@ unsigned StringLiteral::mapCharByteWidth(TargetInfo const &Target, switch (SK) { case StringLiteralKind::Ordinary: case StringLiteralKind::UTF8: + case StringLiteralKind::Binary: CharByteWidth = Target.getCharWidth(); break; case StringLiteralKind::Wide: @@ -1216,6 +1217,7 @@ void StringLiteral::outputString(raw_ostream &OS) const { switch (getKind()) { case StringLiteralKind::Unevaluated: case StringLiteralKind::Ordinary: + case StringLiteralKind::Binary: break; // no prefix. case StringLiteralKind::Wide: OS << 'L'; @@ -1332,6 +1334,11 @@ StringLiteral::getLocationOfByte(unsigned ByteNo, const SourceManager &SM, const LangOptions &Features, const TargetInfo &Target, unsigned *StartToken, unsigned *StartTokenByteOffset) const { + // No source location of bytes for binary literals since they don't come from + // source. + if (getKind() == StringLiteralKind::Binary) + return getStrTokenLoc(0); + assert((getKind() == StringLiteralKind::Ordinary || getKind() == StringLiteralKind::UTF8 || getKind() == StringLiteralKind::Unevaluated) && @@ -1658,8 +1665,11 @@ SourceLocation CallExpr::getBeginLoc() const { Method && Method->isExplicitObjectMemberFunction()) { bool HasFirstArg = getNumArgs() > 0 && getArg(0); assert(HasFirstArg); - if (HasFirstArg) - return getArg(0)->getBeginLoc(); + if (HasFirstArg) { + if (auto FirstArgLoc = getArg(0)->getBeginLoc(); FirstArgLoc.isValid()) { + return FirstArgLoc; + } + } } } diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 5aae78dd2fee7..e0746f4532245 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -3311,7 +3311,11 @@ static bool HandleLValueBase(EvalInfo &Info, const Expr *E, LValue &Obj, return false; // Extract most-derived object and corresponding type. - DerivedDecl = D.MostDerivedType->getAsCXXRecordDecl(); + // FIXME: After implementing P2280R4 it became possible to get references + // here. We do MostDerivedType->getAsCXXRecordDecl() in several other + // locations and if we see crashes in those locations in the future + // it may make more sense to move this fix into Lvalue::set. + DerivedDecl = D.MostDerivedType.getNonReferenceType()->getAsCXXRecordDecl(); if (!CastToDerivedClass(Info, E, Obj, DerivedDecl, D.MostDerivedPathLength)) return false; @@ -12710,11 +12714,13 @@ static bool determineEndOffset(EvalInfo &Info, SourceLocation ExprLoc, bool DetermineForCompleteObject = refersToCompleteObject(LVal); auto CheckedHandleSizeof = [&](QualType Ty, CharUnits &Result) { - if (Ty.isNull() || Ty->isIncompleteType() || Ty->isFunctionType()) + if (Ty.isNull()) return false; - if (Ty->isReferenceType()) - Ty = Ty.getNonReferenceType(); + Ty = Ty.getNonReferenceType(); + + if (Ty->isIncompleteType() || Ty->isFunctionType()) + return false; return HandleSizeof(Info, ExprLoc, Ty, Result); }; diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp index 42b735ccf4a2c..cb35dbd611204 100644 --- a/clang/lib/AST/MicrosoftMangle.cpp +++ b/clang/lib/AST/MicrosoftMangle.cpp @@ -3552,7 +3552,21 @@ void MicrosoftCXXNameMangler::mangleType(const DependentSizedExtVectorType *T, void MicrosoftCXXNameMangler::mangleType(const ConstantMatrixType *T, Qualifiers quals, SourceRange Range) { - Error(Range.getBegin(), "matrix type") << Range; + QualType EltTy = T->getElementType(); + + llvm::SmallString<64> TemplateMangling; + llvm::raw_svector_ostream Stream(TemplateMangling); + MicrosoftCXXNameMangler Extra(Context, Stream); + + Stream << "?$"; + + Extra.mangleSourceName("__matrix"); + Extra.mangleType(EltTy, Range, QMM_Escape); + + Extra.mangleIntegerLiteral(llvm::APSInt::getUnsigned(T->getNumRows())); + Extra.mangleIntegerLiteral(llvm::APSInt::getUnsigned(T->getNumColumns())); + + mangleArtificialTagType(TagTypeKind::Struct, TemplateMangling, {"__clang"}); } void MicrosoftCXXNameMangler::mangleType(const DependentSizedMatrixType *T, diff --git a/clang/lib/AST/NestedNameSpecifier.cpp b/clang/lib/AST/NestedNameSpecifier.cpp index 76c77569da9fd..c043996f1ada3 100644 --- a/clang/lib/AST/NestedNameSpecifier.cpp +++ b/clang/lib/AST/NestedNameSpecifier.cpp @@ -283,13 +283,16 @@ void NestedNameSpecifier::print(raw_ostream &OS, const PrintingPolicy &Policy, case TypeSpec: { const auto *Record = dyn_cast_or_null(getAsRecordDecl()); - if (ResolveTemplateArguments && Record) { + const TemplateParameterList *TPL = nullptr; + if (Record) { + TPL = Record->getSpecializedTemplate()->getTemplateParameters(); + if (ResolveTemplateArguments) { // Print the type trait with resolved template parameters. Record->printName(OS, Policy); - printTemplateArgumentList( - OS, Record->getTemplateArgs().asArray(), Policy, - Record->getSpecializedTemplate()->getTemplateParameters()); + printTemplateArgumentList(OS, Record->getTemplateArgs().asArray(), + Policy, TPL); break; + } } const Type *T = getAsType(); @@ -313,8 +316,8 @@ void NestedNameSpecifier::print(raw_ostream &OS, const PrintingPolicy &Policy, TemplateName::Qualified::None); // Print the template argument list. - printTemplateArgumentList(OS, SpecType->template_arguments(), - InnerPolicy); + printTemplateArgumentList(OS, SpecType->template_arguments(), InnerPolicy, + TPL); } else if (const auto *DepSpecType = dyn_cast(T)) { // Print the template name without its corresponding @@ -322,7 +325,7 @@ void NestedNameSpecifier::print(raw_ostream &OS, const PrintingPolicy &Policy, OS << DepSpecType->getIdentifier()->getName(); // Print the template argument list. printTemplateArgumentList(OS, DepSpecType->template_arguments(), - InnerPolicy); + InnerPolicy, TPL); } else { // Print the type normally QualType(T, 0).print(OS, InnerPolicy); diff --git a/clang/lib/Analysis/LiveVariables.cpp b/clang/lib/Analysis/LiveVariables.cpp index 481932ee59c8e..5fb5ee767a683 100644 --- a/clang/lib/Analysis/LiveVariables.cpp +++ b/clang/lib/Analysis/LiveVariables.cpp @@ -662,12 +662,19 @@ void LiveVariables::dumpExprLiveness(const SourceManager &M) { } void LiveVariablesImpl::dumpExprLiveness(const SourceManager &M) { + const ASTContext &Ctx = analysisContext.getASTContext(); + auto ByIDs = [&Ctx](const Expr *L, const Expr *R) { + return L->getID(Ctx) < R->getID(Ctx); + }; + // Don't iterate over blockEndsToLiveness directly because it's not sorted. for (const CFGBlock *B : *analysisContext.getCFG()) { - llvm::errs() << "\n[ B" << B->getBlockID() << " (live expressions at block exit) ]\n"; - for (const Expr *E : blocksEndToLiveness[B].liveExprs) { + std::vector LiveExprs; + llvm::append_range(LiveExprs, blocksEndToLiveness[B].liveExprs); + llvm::sort(LiveExprs, ByIDs); + for (const Expr *E : LiveExprs) { llvm::errs() << "\n"; E->dump(); } diff --git a/clang/lib/Basic/Targets/OSTargets.h b/clang/lib/Basic/Targets/OSTargets.h index 991efd2bde01f..4cf4230273d38 100644 --- a/clang/lib/Basic/Targets/OSTargets.h +++ b/clang/lib/Basic/Targets/OSTargets.h @@ -618,14 +618,7 @@ class LLVM_LIBRARY_VISIBILITY SolarisTargetInfo : public OSTargetInfo { DefineStd(Builder, "unix", Opts); Builder.defineMacro("__svr4__"); Builder.defineMacro("__SVR4"); - // Solaris headers require _XOPEN_SOURCE to be set to 600 for C99 and - // newer, but to 500 for everything else. feature_test.h has a check to - // ensure that you are not using C99 with an old version of X/Open or C89 - // with a new version. - if (Opts.C99) - Builder.defineMacro("_XOPEN_SOURCE", "600"); - else - Builder.defineMacro("_XOPEN_SOURCE", "500"); + Builder.defineMacro("_XOPEN_SOURCE", "600"); if (Opts.CPlusPlus) { Builder.defineMacro("__C99FEATURES__"); Builder.defineMacro("_FILE_OFFSET_BITS", "64"); diff --git a/clang/lib/Basic/Targets/SystemZ.cpp b/clang/lib/Basic/Targets/SystemZ.cpp index c836d110d26d5..6326188b3bd18 100644 --- a/clang/lib/Basic/Targets/SystemZ.cpp +++ b/clang/lib/Basic/Targets/SystemZ.cpp @@ -105,7 +105,7 @@ static constexpr ISANameRevision ISARevisions[] = { {{"arch12"}, 12}, {{"z14"}, 12}, {{"arch13"}, 13}, {{"z15"}, 13}, {{"arch14"}, 14}, {{"z16"}, 14}, - {{"arch15"}, 15}, + {{"arch15"}, 15}, {{"z17"}, 15}, }; int SystemZTargetInfo::getISARevision(StringRef Name) const { diff --git a/clang/lib/Driver/Distro.cpp b/clang/lib/Driver/Distro.cpp index 3cc79535de8da..71ba71fa18379 100644 --- a/clang/lib/Driver/Distro.cpp +++ b/clang/lib/Driver/Distro.cpp @@ -160,6 +160,10 @@ static Distro::DistroType DetectDistro(llvm::vfs::FileSystem &VFS) { return Distro::DebianBookworm; case 13: return Distro::DebianTrixie; + case 14: + return Distro::DebianForky; + case 15: + return Distro::DebianDuke; default: return Distro::UnknownDistro; } @@ -173,6 +177,8 @@ static Distro::DistroType DetectDistro(llvm::vfs::FileSystem &VFS) { .Case("bullseye/sid", Distro::DebianBullseye) .Case("bookworm/sid", Distro::DebianBookworm) .Case("trixie/sid", Distro::DebianTrixie) + .Case("forky/sid", Distro::DebianForky) + .Case("duke/sid", Distro::DebianDuke) .Default(Distro::UnknownDistro); } diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.cpp b/clang/lib/Driver/ToolChains/Arch/ARM.cpp index 3aee540d501be..ef2d0c93b5b0b 100644 --- a/clang/lib/Driver/ToolChains/Arch/ARM.cpp +++ b/clang/lib/Driver/ToolChains/Arch/ARM.cpp @@ -658,21 +658,13 @@ llvm::ARM::FPUKind arm::getARMTargetFeatures(const Driver &D, CPUArgFPUKind != llvm::ARM::FK_INVALID ? CPUArgFPUKind : ArchArgFPUKind; (void)llvm::ARM::getFPUFeatures(FPUKind, Features); } else { - bool Generic = true; if (!ForAS) { std::string CPU = arm::getARMTargetCPU(CPUName, ArchName, Triple); - if (CPU != "generic") - Generic = false; llvm::ARM::ArchKind ArchKind = arm::getLLVMArchKindForARM(CPU, ArchName, Triple); FPUKind = llvm::ARM::getDefaultFPU(CPU, ArchKind); (void)llvm::ARM::getFPUFeatures(FPUKind, Features); } - if (Generic && (Triple.isOSWindows() || Triple.isOSDarwin()) && - getARMSubArchVersionNumber(Triple) >= 7) { - FPUKind = llvm::ARM::parseFPU("neon"); - (void)llvm::ARM::getFPUFeatures(FPUKind, Features); - } } // Now we've finished accumulating features from arch, cpu and fpu, diff --git a/clang/lib/Driver/ToolChains/Hexagon.cpp b/clang/lib/Driver/ToolChains/Hexagon.cpp index 76cedf312d68a..6ea701a7882d1 100644 --- a/clang/lib/Driver/ToolChains/Hexagon.cpp +++ b/clang/lib/Driver/ToolChains/Hexagon.cpp @@ -313,6 +313,7 @@ constructHexagonLinkArgs(Compilation &C, const JobAction &JA, // handled somewhere else. Args.ClaimAllArgs(options::OPT_static_libgcc); + CmdArgs.push_back("--eh-frame-hdr"); //---------------------------------------------------------------------------- // //---------------------------------------------------------------------------- @@ -802,9 +803,7 @@ bool HexagonToolChain::isAutoHVXEnabled(const llvm::opt::ArgList &Args) { // Returns the default CPU for Hexagon. This is the default compilation target // if no Hexagon processor is selected at the command-line. // -StringRef HexagonToolChain::GetDefaultCPU() { - return "hexagonv60"; -} +StringRef HexagonToolChain::GetDefaultCPU() { return "hexagonv68"; } StringRef HexagonToolChain::GetTargetCPUVersion(const ArgList &Args) { Arg *CpuArg = nullptr; diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index c311deaa17bb0..d953348b0258d 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -349,6 +349,13 @@ bool ContinuationIndenter::canBreak(const LineState &State) { } } + // Allow breaking before the right parens with block indentation if there was + // a break after the left parens, which is tracked by BreakBeforeClosingParen. + if (Style.AlignAfterOpenBracket == FormatStyle::BAS_BlockIndent && + Current.is(tok::r_paren)) { + return CurrentState.BreakBeforeClosingParen; + } + // Don't allow breaking before a closing brace of a block-indented braced list // initializer if there isn't already a break. if (Current.is(tok::r_brace) && Current.MatchingParen && @@ -1445,7 +1452,9 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { (PreviousNonComment->ClosesTemplateDeclaration || PreviousNonComment->ClosesRequiresClause || (PreviousNonComment->is(TT_AttributeMacro) && - Current.isNot(tok::l_paren)) || + Current.isNot(tok::l_paren) && + !Current.endsSequence(TT_StartOfName, TT_AttributeMacro, + TT_PointerOrReference)) || PreviousNonComment->isOneOf( TT_AttributeRParen, TT_AttributeSquare, TT_FunctionAnnotationRParen, TT_JavaAnnotation, TT_LeadingJavaAnnotation))) || diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index 0bb8545884442..aba7db6dd50a8 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -2114,10 +2114,14 @@ std::error_code parseConfiguration(llvm::MemoryBufferRef Config, FormatStyle::FormatStyleSet StyleSet; bool LanguageFound = false; for (const FormatStyle &Style : llvm::reverse(Styles)) { - if (Style.Language != FormatStyle::LK_None) + const auto Lang = Style.Language; + if (Lang != FormatStyle::LK_None) StyleSet.Add(Style); - if (Style.Language == Language) + if (Lang == Language || + // For backward compatibility. + (Lang == FormatStyle::LK_Cpp && Language == FormatStyle::LK_C)) { LanguageFound = true; + } } if (!LanguageFound) { if (Styles.empty() || Styles[0].Language != FormatStyle::LK_None) @@ -2157,8 +2161,14 @@ FormatStyle::FormatStyleSet::Get(FormatStyle::LanguageKind Language) const { if (!Styles) return std::nullopt; auto It = Styles->find(Language); - if (It == Styles->end()) - return std::nullopt; + if (It == Styles->end()) { + if (Language != FormatStyle::LK_C) + return std::nullopt; + // For backward compatibility. + It = Styles->find(FormatStyle::LK_Cpp); + if (It == Styles->end()) + return std::nullopt; + } FormatStyle Style = It->second; Style.StyleSet = *this; return Style; @@ -3733,8 +3743,10 @@ reformat(const FormatStyle &Style, StringRef Code, tooling::Replacements Replaces = Formatter(*Env, Style, Status).process().first; // add a replacement to remove the "x = " from the result. - Replaces = Replaces.merge( - tooling::Replacements(tooling::Replacement(FileName, 0, 4, ""))); + if (Code.starts_with("x = ")) { + Replaces = Replaces.merge( + tooling::Replacements(tooling::Replacement(FileName, 0, 4, ""))); + } // apply the reformatting changes and the removal of "x = ". if (applyAllReplacements(Code, Replaces)) return {Replaces, 0}; @@ -3936,34 +3948,42 @@ tooling::Replacements sortUsingDeclarations(const FormatStyle &Style, LangOptions getFormattingLangOpts(const FormatStyle &Style) { LangOptions LangOpts; - FormatStyle::LanguageStandard LexingStd = Style.Standard; - if (LexingStd == FormatStyle::LS_Auto) - LexingStd = FormatStyle::LS_Latest; - if (LexingStd == FormatStyle::LS_Latest) + auto LexingStd = Style.Standard; + if (LexingStd == FormatStyle::LS_Auto || LexingStd == FormatStyle::LS_Latest) LexingStd = FormatStyle::LS_Cpp20; - LangOpts.CPlusPlus = 1; - LangOpts.CPlusPlus11 = LexingStd >= FormatStyle::LS_Cpp11; - LangOpts.CPlusPlus14 = LexingStd >= FormatStyle::LS_Cpp14; - LangOpts.CPlusPlus17 = LexingStd >= FormatStyle::LS_Cpp17; - LangOpts.CPlusPlus20 = LexingStd >= FormatStyle::LS_Cpp20; - LangOpts.Char8 = LexingStd >= FormatStyle::LS_Cpp20; + + const bool SinceCpp11 = LexingStd >= FormatStyle::LS_Cpp11; + const bool SinceCpp20 = LexingStd >= FormatStyle::LS_Cpp20; + + switch (Style.Language) { + case FormatStyle::LK_C: + LangOpts.C11 = 1; + break; + case FormatStyle::LK_Cpp: + case FormatStyle::LK_ObjC: + LangOpts.CXXOperatorNames = 1; + LangOpts.CPlusPlus11 = SinceCpp11; + LangOpts.CPlusPlus14 = LexingStd >= FormatStyle::LS_Cpp14; + LangOpts.CPlusPlus17 = LexingStd >= FormatStyle::LS_Cpp17; + LangOpts.CPlusPlus20 = SinceCpp20; + [[fallthrough]]; + default: + LangOpts.CPlusPlus = 1; + } + + LangOpts.Char8 = SinceCpp20; // Turning on digraphs in standards before C++0x is error-prone, because e.g. // the sequence "<::" will be unconditionally treated as "[:". // Cf. Lexer::LexTokenInternal. - LangOpts.Digraphs = LexingStd >= FormatStyle::LS_Cpp11; + LangOpts.Digraphs = SinceCpp11; LangOpts.LineComment = 1; - - const auto Language = Style.Language; - LangOpts.C17 = Language == FormatStyle::LK_C; - LangOpts.CXXOperatorNames = - Language == FormatStyle::LK_Cpp || Language == FormatStyle::LK_ObjC; - LangOpts.Bool = 1; LangOpts.ObjC = 1; LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally. LangOpts.DeclSpecKeyword = 1; // To get __declspec. LangOpts.C99 = 1; // To get kw_restrict for non-underscore-prefixed restrict. + return LangOpts; } diff --git a/clang/lib/Format/FormatToken.cpp b/clang/lib/Format/FormatToken.cpp index 60e428123d26d..a4e2acc922c0d 100644 --- a/clang/lib/Format/FormatToken.cpp +++ b/clang/lib/Format/FormatToken.cpp @@ -44,7 +44,7 @@ static SmallVector CppNonKeywordTypes = { bool FormatToken::isTypeName(const LangOptions &LangOpts) const { if (is(TT_TypeName) || Tok.isSimpleTypeSpecifier(LangOpts)) return true; - return (LangOpts.CXXOperatorNames || LangOpts.C17) && is(tok::identifier) && + return (LangOpts.CXXOperatorNames || LangOpts.C11) && is(tok::identifier) && std::binary_search(CppNonKeywordTypes.begin(), CppNonKeywordTypes.end(), TokenText); } diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index 16f0a76f3a954..0755a5d355394 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -636,6 +636,36 @@ bool FormatTokenLexer::canPrecedeRegexLiteral(FormatToken *Prev) { return true; } +void FormatTokenLexer::tryParseJavaTextBlock() { + if (FormatTok->TokenText != "\"\"") + return; + + const auto *S = Lex->getBufferLocation(); + const auto *End = Lex->getBuffer().end(); + + if (S == End || *S != '\"') + return; + + ++S; // Skip the `"""` that begins a text block. + + // Find the `"""` that ends the text block. + for (int Count = 0; Count < 3 && S < End; ++S) { + switch (*S) { + case '\\': + Count = -1; + break; + case '\"': + ++Count; + break; + default: + Count = 0; + } + } + + // Ignore the possibly invalid text block. + resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(S))); +} + // Tries to parse a JavaScript Regex literal starting at the current token, // if that begins with a slash and is in a location where JavaScript allows // regex literals. Changes the current token to a regex literal and updates @@ -1326,6 +1356,9 @@ FormatToken *FormatTokenLexer::getNextToken() { FormatTok->TokenText = FormatTok->TokenText.substr(0, 1); ++Column; StateStack.push(LexerState::TOKEN_STASHED); + } else if (Style.Language == FormatStyle::LK_Java && + FormatTok->is(tok::string_literal)) { + tryParseJavaTextBlock(); } if (Style.isVerilog() && Tokens.size() > 0 && diff --git a/clang/lib/Format/FormatTokenLexer.h b/clang/lib/Format/FormatTokenLexer.h index 61474a3f9ada8..d9a25c8ef3538 100644 --- a/clang/lib/Format/FormatTokenLexer.h +++ b/clang/lib/Format/FormatTokenLexer.h @@ -71,6 +71,8 @@ class FormatTokenLexer { bool canPrecedeRegexLiteral(FormatToken *Prev); + void tryParseJavaTextBlock(); + // Tries to parse a JavaScript Regex literal starting at the current token, // if that begins with a slash and is in a location where JavaScript allows // regex literals. Changes the current token to a regex literal and updates diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 976c4d888e1fd..0c13356ca96de 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -129,7 +129,6 @@ class AnnotatingParser { : Style(Style), Line(Line), CurrentToken(Line.First), AutoFound(false), IsCpp(Style.isCpp()), LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords), Scopes(Scopes), TemplateDeclarationDepth(0) { - assert(IsCpp == (LangOpts.CXXOperatorNames || LangOpts.C17)); Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false)); resetTokenMetadata(); } @@ -3820,7 +3819,7 @@ static bool isFunctionDeclarationName(const LangOptions &LangOpts, }; const auto *Next = Current.Next; - const bool IsCpp = LangOpts.CXXOperatorNames || LangOpts.C17; + const bool IsCpp = LangOpts.CXXOperatorNames || LangOpts.C11; // Find parentheses of parameter list. if (Current.is(tok::kw_operator)) { @@ -3840,6 +3839,8 @@ static bool isFunctionDeclarationName(const LangOptions &LangOpts, } else { if (Current.isNot(TT_StartOfName) || Current.NestingLevel != 0) return false; + while (Next && Next->startsSequence(tok::hashhash, tok::identifier)) + Next = Next->Next->Next; for (; Next; Next = Next->Next) { if (Next->is(TT_TemplateOpener) && Next->MatchingParen) { Next = Next->MatchingParen; @@ -3962,8 +3963,10 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) const { FormatToken *AfterLastAttribute = nullptr; FormatToken *ClosingParen = nullptr; - for (auto *Tok = FirstNonComment ? FirstNonComment->Next : nullptr; Tok; - Tok = Tok->Next) { + for (auto *Tok = FirstNonComment && FirstNonComment->isNot(tok::kw_using) + ? FirstNonComment->Next + : nullptr; + Tok; Tok = Tok->Next) { if (Tok->is(TT_StartOfName)) SeenName = true; if (Tok->Previous->EndsCppAttributeGroup) @@ -5437,7 +5440,12 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, // handled. if (Left.is(tok::amp) && Right.is(tok::r_square)) return Style.SpacesInSquareBrackets; - return Style.SpaceAfterLogicalNot && Left.is(tok::exclaim); + if (Left.isNot(tok::exclaim)) + return false; + if (Left.TokenText == "!") + return Style.SpaceAfterLogicalNot; + assert(Left.TokenText == "not"); + return Right.isOneOf(tok::coloncolon, TT_UnaryOperator); } // If the next token is a binary operator or a selector name, we have diff --git a/clang/lib/Format/TokenAnnotator.h b/clang/lib/Format/TokenAnnotator.h index c0c13941ef4f7..e4b94431e68b4 100644 --- a/clang/lib/Format/TokenAnnotator.h +++ b/clang/lib/Format/TokenAnnotator.h @@ -224,9 +224,7 @@ class TokenAnnotator { public: TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords) : Style(Style), IsCpp(Style.isCpp()), - LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords) { - assert(IsCpp == (LangOpts.CXXOperatorNames || LangOpts.C17)); - } + LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords) {} /// Adapts the indent levels of comment lines to the indent of the /// subsequent line. diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index 9a03e9409fcbc..673b3e6c4b8c2 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -167,9 +167,7 @@ UnwrappedLineParser::UnwrappedLineParser( ? IG_Rejected : IG_Inited), IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn), - Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) { - assert(IsCpp == (LangOpts.CXXOperatorNames || LangOpts.C17)); -} + Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {} void UnwrappedLineParser::reset() { PPBranchLevel = -1; @@ -1839,8 +1837,8 @@ void UnwrappedLineParser::parseStructuralElement( nextToken(); if (FormatTok->is(tok::l_paren)) { parseParens(); - assert(FormatTok->Previous); - if (FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto, + if (FormatTok->Previous && + FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto, tok::l_paren)) { Line->SeenDecltypeAuto = true; } @@ -2583,7 +2581,8 @@ bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) { if (Prev) { auto OptionalParens = [&] { if (MightBeStmtExpr || MightBeFoldExpr || Line->InMacroBody || - SeenComma || Style.RemoveParentheses == FormatStyle::RPS_Leave) { + SeenComma || Style.RemoveParentheses == FormatStyle::RPS_Leave || + RParen->getPreviousNonComment() == LParen) { return false; } const bool DoubleParens = diff --git a/clang/lib/Headers/__clang_cuda_intrinsics.h b/clang/lib/Headers/__clang_cuda_intrinsics.h index a04e8b6de44d0..8b230af6f6647 100644 --- a/clang/lib/Headers/__clang_cuda_intrinsics.h +++ b/clang/lib/Headers/__clang_cuda_intrinsics.h @@ -515,32 +515,32 @@ __device__ inline cuuint32_t __nvvm_get_smem_pointer(void *__ptr) { #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 800 __device__ inline unsigned __reduce_add_sync(unsigned __mask, unsigned __value) { - return __nvvm_redux_sync_add(__mask, __value); + return __nvvm_redux_sync_add(__value, __mask); } __device__ inline unsigned __reduce_min_sync(unsigned __mask, unsigned __value) { - return __nvvm_redux_sync_umin(__mask, __value); + return __nvvm_redux_sync_umin(__value, __mask); } __device__ inline unsigned __reduce_max_sync(unsigned __mask, unsigned __value) { - return __nvvm_redux_sync_umax(__mask, __value); + return __nvvm_redux_sync_umax(__value, __mask); } __device__ inline int __reduce_min_sync(unsigned __mask, int __value) { - return __nvvm_redux_sync_min(__mask, __value); + return __nvvm_redux_sync_min(__value, __mask); } __device__ inline int __reduce_max_sync(unsigned __mask, int __value) { - return __nvvm_redux_sync_max(__mask, __value); + return __nvvm_redux_sync_max(__value, __mask); } __device__ inline unsigned __reduce_or_sync(unsigned __mask, unsigned __value) { - return __nvvm_redux_sync_or(__mask, __value); + return __nvvm_redux_sync_or(__value, __mask); } __device__ inline unsigned __reduce_and_sync(unsigned __mask, unsigned __value) { - return __nvvm_redux_sync_and(__mask, __value); + return __nvvm_redux_sync_and(__value, __mask); } __device__ inline unsigned __reduce_xor_sync(unsigned __mask, unsigned __value) { - return __nvvm_redux_sync_xor(__mask, __value); + return __nvvm_redux_sync_xor(__value, __mask); } __device__ inline void __nv_memcpy_async_shared_global_4(void *__dst, diff --git a/clang/lib/Headers/avx10_2_512convertintrin.h b/clang/lib/Headers/avx10_2_512convertintrin.h index 516ccc68672d6..ee8cbf28ca41c 100644 --- a/clang/lib/Headers/avx10_2_512convertintrin.h +++ b/clang/lib/Headers/avx10_2_512convertintrin.h @@ -78,20 +78,20 @@ _mm512_maskz_cvtbiasph_bf8(__mmask32 __U, __m512i __A, __m512h __B) { } static __inline__ __m256i __DEFAULT_FN_ATTRS512 -_mm512_cvtbiassph_bf8(__m512i __A, __m512h __B) { +_mm512_cvts_biasph_bf8(__m512i __A, __m512h __B) { return (__m256i)__builtin_ia32_vcvtbiasph2bf8s_512_mask( (__v64qi)__A, (__v32hf)__B, (__v32qi)_mm256_undefined_si256(), (__mmask32)-1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtbiassph_bf8( +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvts_biasph_bf8( __m256i __W, __mmask32 __U, __m512i __A, __m512h __B) { return (__m256i)__builtin_ia32_vcvtbiasph2bf8s_512_mask( (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)__W, (__mmask32)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtbiassph_bf8(__mmask32 __U, __m512i __A, __m512h __B) { +_mm512_maskz_cvts_biasph_bf8(__mmask32 __U, __m512i __A, __m512h __B) { return (__m256i)__builtin_ia32_vcvtbiasph2bf8s_512_mask( (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U); @@ -118,20 +118,20 @@ _mm512_maskz_cvtbiasph_hf8(__mmask32 __U, __m512i __A, __m512h __B) { } static __inline__ __m256i __DEFAULT_FN_ATTRS512 -_mm512_cvtbiassph_hf8(__m512i __A, __m512h __B) { +_mm512_cvts_biasph_hf8(__m512i __A, __m512h __B) { return (__m256i)__builtin_ia32_vcvtbiasph2hf8s_512_mask( (__v64qi)__A, (__v32hf)__B, (__v32qi)_mm256_undefined_si256(), (__mmask32)-1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtbiassph_hf8( +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvts_biasph_hf8( __m256i __W, __mmask32 __U, __m512i __A, __m512h __B) { return (__m256i)__builtin_ia32_vcvtbiasph2hf8s_512_mask( (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)__W, (__mmask32)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtbiassph_hf8(__mmask32 __U, __m512i __A, __m512h __B) { +_mm512_maskz_cvts_biasph_hf8(__mmask32 __U, __m512i __A, __m512h __B) { return (__m256i)__builtin_ia32_vcvtbiasph2hf8s_512_mask( (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U); @@ -157,21 +157,21 @@ _mm512_maskz_cvt2ph_bf8(__mmask64 __U, __m512h __A, __m512h __B) { } static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_cvts2ph_bf8(__m512h __A, __m512h __B) { +_mm512_cvts_2ph_bf8(__m512h __A, __m512h __B) { return (__m512i)__builtin_ia32_vcvt2ph2bf8s_512((__v32hf)(__A), (__v32hf)(__B)); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_cvts2ph_bf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) { +_mm512_mask_cvts_2ph_bf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) { return (__m512i)__builtin_ia32_selectb_512( - (__mmask64)__U, (__v64qi)_mm512_cvts2ph_bf8(__A, __B), (__v64qi)__W); + (__mmask64)__U, (__v64qi)_mm512_cvts_2ph_bf8(__A, __B), (__v64qi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvts2ph_bf8(__mmask64 __U, __m512h __A, __m512h __B) { +_mm512_maskz_cvts_2ph_bf8(__mmask64 __U, __m512h __A, __m512h __B) { return (__m512i)__builtin_ia32_selectb_512( - (__mmask64)__U, (__v64qi)_mm512_cvts2ph_bf8(__A, __B), + (__mmask64)__U, (__v64qi)_mm512_cvts_2ph_bf8(__A, __B), (__v64qi)(__m512i)_mm512_setzero_si512()); } @@ -195,21 +195,21 @@ _mm512_maskz_cvt2ph_hf8(__mmask64 __U, __m512h __A, __m512h __B) { } static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_cvts2ph_hf8(__m512h __A, __m512h __B) { +_mm512_cvts_2ph_hf8(__m512h __A, __m512h __B) { return (__m512i)__builtin_ia32_vcvt2ph2hf8s_512((__v32hf)(__A), (__v32hf)(__B)); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_cvts2ph_hf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) { +_mm512_mask_cvts_2ph_hf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) { return (__m512i)__builtin_ia32_selectb_512( - (__mmask64)__U, (__v64qi)_mm512_cvts2ph_hf8(__A, __B), (__v64qi)__W); + (__mmask64)__U, (__v64qi)_mm512_cvts_2ph_hf8(__A, __B), (__v64qi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvts2ph_hf8(__mmask64 __U, __m512h __A, __m512h __B) { +_mm512_maskz_cvts_2ph_hf8(__mmask64 __U, __m512h __A, __m512h __B) { return (__m512i)__builtin_ia32_selectb_512( - (__mmask64)__U, (__v64qi)_mm512_cvts2ph_hf8(__A, __B), + (__mmask64)__U, (__v64qi)_mm512_cvts_2ph_hf8(__A, __B), (__v64qi)(__m512i)_mm512_setzero_si512()); } @@ -247,19 +247,20 @@ _mm512_maskz_cvtph_bf8(__mmask32 __U, __m512h __A) { (__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsph_bf8(__m512h __A) { +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_cvts_ph_bf8(__m512h __A) { return (__m256i)__builtin_ia32_vcvtph2bf8s_512_mask( (__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtsph_bf8(__m256i __W, __mmask32 __U, __m512h __A) { +_mm512_mask_cvts_ph_bf8(__m256i __W, __mmask32 __U, __m512h __A) { return (__m256i)__builtin_ia32_vcvtph2bf8s_512_mask( (__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtsph_bf8(__mmask32 __U, __m512h __A) { +_mm512_maskz_cvts_ph_bf8(__mmask32 __U, __m512h __A) { return (__m256i)__builtin_ia32_vcvtph2bf8s_512_mask( (__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U); } @@ -281,19 +282,20 @@ _mm512_maskz_cvtph_hf8(__mmask32 __U, __m512h __A) { (__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsph_hf8(__m512h __A) { +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_cvts_ph_hf8(__m512h __A) { return (__m256i)__builtin_ia32_vcvtph2hf8s_512_mask( (__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtsph_hf8(__m256i __W, __mmask32 __U, __m512h __A) { +_mm512_mask_cvts_ph_hf8(__m256i __W, __mmask32 __U, __m512h __A) { return (__m256i)__builtin_ia32_vcvtph2hf8s_512_mask( (__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtsph_hf8(__mmask32 __U, __m512h __A) { +_mm512_maskz_cvts_ph_hf8(__mmask32 __U, __m512h __A) { return (__m256i)__builtin_ia32_vcvtph2hf8s_512_mask( (__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U); } diff --git a/clang/lib/Headers/avx10_2_512satcvtdsintrin.h b/clang/lib/Headers/avx10_2_512satcvtdsintrin.h index 5970ab0331444..012a6282b5b18 100644 --- a/clang/lib/Headers/avx10_2_512satcvtdsintrin.h +++ b/clang/lib/Headers/avx10_2_512satcvtdsintrin.h @@ -20,20 +20,21 @@ __min_vector_width__(512))) // 512 bit : Double -> Int -static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvttspd_epi32(__m512d __A) { +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm512_cvtts_pd_epi32(__m512d __A) { return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask( (__v8df)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm512_mask_cvttspd_epi32(__m256i __W, __mmask8 __U, __m512d __A) { +_mm512_mask_cvtts_pd_epi32(__m256i __W, __mmask8 __U, __m512d __A) { return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask( (__v8df)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm512_maskz_cvttspd_epi32(__mmask8 __U, __m512d __A) { +_mm512_maskz_cvtts_pd_epi32(__mmask8 __U, __m512d __A) { return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask( (__v8df)__A, (__v8si)_mm256_setzero_si256(), __U, _MM_FROUND_CUR_DIRECTION)); @@ -55,20 +56,21 @@ _mm512_maskz_cvttspd_epi32(__mmask8 __U, __m512d __A) { (const int)(__R))) // 512 bit : Double -> uInt -static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvttspd_epu32(__m512d __A) { +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm512_cvtts_pd_epu32(__m512d __A) { return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask( (__v8df)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm512_mask_cvttspd_epu32(__m256i __W, __mmask8 __U, __m512d __A) { +_mm512_mask_cvtts_pd_epu32(__m256i __W, __mmask8 __U, __m512d __A) { return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask( (__v8df)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm512_maskz_cvttspd_epu32(__mmask8 __U, __m512d __A) { +_mm512_maskz_cvtts_pd_epu32(__mmask8 __U, __m512d __A) { return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask( (__v8df)__A, (__v8si)_mm256_setzero_si256(), __U, _MM_FROUND_CUR_DIRECTION)); @@ -91,18 +93,19 @@ _mm512_maskz_cvttspd_epu32(__mmask8 __U, __m512d __A) { // 512 bit : Double -> Long -static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttspd_epi64(__m512d __A) { +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_cvtts_pd_epi64(__m512d __A) { return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask( (__v8df)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_cvttspd_epi64(__m512i __W, __mmask8 __U, __m512d __A) { +_mm512_mask_cvtts_pd_epi64(__m512i __W, __mmask8 __U, __m512d __A) { return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask( (__v8df)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_cvttspd_epi64(__mmask8 __U, __m512d __A) { +_mm512_maskz_cvtts_pd_epi64(__mmask8 __U, __m512d __A) { return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask( (__v8df)__A, (__v8di)_mm512_setzero_si512(), __U, _MM_FROUND_CUR_DIRECTION)); @@ -125,20 +128,21 @@ _mm512_maskz_cvttspd_epi64(__mmask8 __U, __m512d __A) { // 512 bit : Double -> ULong -static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttspd_epu64(__m512d __A) { +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_cvtts_pd_epu64(__m512d __A) { return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask( (__v8df)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_cvttspd_epu64(__m512i __W, __mmask8 __U, __m512d __A) { +_mm512_mask_cvtts_pd_epu64(__m512i __W, __mmask8 __U, __m512d __A) { return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask( (__v8df)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_cvttspd_epu64(__mmask8 __U, __m512d __A) { +_mm512_maskz_cvtts_pd_epu64(__mmask8 __U, __m512d __A) { return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask( (__v8df)__A, (__v8di)_mm512_setzero_si512(), __U, _MM_FROUND_CUR_DIRECTION)); @@ -160,20 +164,20 @@ _mm512_maskz_cvttspd_epu64(__mmask8 __U, __m512d __A) { (const int)(__R))) // 512 bit: Float -> int -static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epi32(__m512 __A) { +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtts_ps_epi32(__m512 __A) { return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask( (__v16sf)(__A), (__v16si)_mm512_undefined_epi32(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_cvttsps_epi32(__m512i __W, __mmask16 __U, __m512 __A) { +_mm512_mask_cvtts_ps_epi32(__m512i __W, __mmask16 __U, __m512 __A) { return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask( (__v16sf)(__A), (__v16si)(__W), __U, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_cvttsps_epi32(__mmask16 __U, __m512 __A) { +_mm512_maskz_cvtts_ps_epi32(__mmask16 __U, __m512 __A) { return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask( (__v16sf)(__A), (__v16si)_mm512_setzero_si512(), __U, _MM_FROUND_CUR_DIRECTION)); @@ -195,20 +199,20 @@ _mm512_maskz_cvttsps_epi32(__mmask16 __U, __m512 __A) { (__mmask16)(__U), (const int)(__R))) // 512 bit: Float -> uint -static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epu32(__m512 __A) { +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtts_ps_epu32(__m512 __A) { return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask( (__v16sf)(__A), (__v16si)_mm512_undefined_epi32(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_cvttsps_epu32(__m512i __W, __mmask16 __U, __m512 __A) { +_mm512_mask_cvtts_ps_epu32(__m512i __W, __mmask16 __U, __m512 __A) { return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask( (__v16sf)(__A), (__v16si)(__W), __U, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_cvttsps_epu32(__mmask16 __U, __m512 __A) { +_mm512_maskz_cvtts_ps_epu32(__mmask16 __U, __m512 __A) { return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask( (__v16sf)(__A), (__v16si)_mm512_setzero_si512(), __U, _MM_FROUND_CUR_DIRECTION)); @@ -230,20 +234,20 @@ _mm512_maskz_cvttsps_epu32(__mmask16 __U, __m512 __A) { (__mmask16)(__U), (const int)(__R))) // 512 bit : float -> long -static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epi64(__m256 __A) { +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtts_ps_epi64(__m256 __A) { return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask( (__v8sf)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_cvttsps_epi64(__m512i __W, __mmask8 __U, __m256 __A) { +_mm512_mask_cvtts_ps_epi64(__m512i __W, __mmask8 __U, __m256 __A) { return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask( (__v8sf)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_cvttsps_epi64(__mmask8 __U, __m256 __A) { +_mm512_maskz_cvtts_ps_epi64(__mmask8 __U, __m256 __A) { return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask( (__v8sf)__A, (__v8di)_mm512_setzero_si512(), __U, _MM_FROUND_CUR_DIRECTION)); @@ -265,20 +269,20 @@ _mm512_maskz_cvttsps_epi64(__mmask8 __U, __m256 __A) { (const int)(__R))) // 512 bit : float -> ulong -static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epu64(__m256 __A) { +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtts_ps_epu64(__m256 __A) { return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask( (__v8sf)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_cvttsps_epu64(__m512i __W, __mmask8 __U, __m256 __A) { +_mm512_mask_cvtts_ps_epu64(__m512i __W, __mmask8 __U, __m256 __A) { return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask( (__v8sf)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_cvttsps_epu64(__mmask8 __U, __m256 __A) { +_mm512_maskz_cvtts_ps_epu64(__mmask8 __U, __m256 __A) { return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask( (__v8sf)__A, (__v8di)_mm512_setzero_si512(), __U, _MM_FROUND_CUR_DIRECTION)); diff --git a/clang/lib/Headers/avx10_2_512satcvtintrin.h b/clang/lib/Headers/avx10_2_512satcvtintrin.h index 7f41deb5212c5..b58e3db8956d6 100644 --- a/clang/lib/Headers/avx10_2_512satcvtintrin.h +++ b/clang/lib/Headers/avx10_2_512satcvtintrin.h @@ -14,286 +14,286 @@ #ifndef __AVX10_2_512SATCVTINTRIN_H #define __AVX10_2_512SATCVTINTRIN_H -#define _mm512_ipcvtbf16_epi8(A) \ +#define _mm512_ipcvts_bf16_epi8(A) \ ((__m512i)__builtin_ia32_vcvtbf162ibs512((__v32bf)(__m512bh)(A))) -#define _mm512_mask_ipcvtbf16_epi8(W, U, A) \ +#define _mm512_mask_ipcvts_bf16_epi8(W, U, A) \ ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ - (__v32hi)_mm512_ipcvtbf16_epi8(A), \ + (__v32hi)_mm512_ipcvts_bf16_epi8(A), \ (__v32hi)(__m512i)(W))) -#define _mm512_maskz_ipcvtbf16_epi8(U, A) \ +#define _mm512_maskz_ipcvts_bf16_epi8(U, A) \ ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ - (__v32hi)_mm512_ipcvtbf16_epi8(A), \ + (__v32hi)_mm512_ipcvts_bf16_epi8(A), \ (__v32hi)_mm512_setzero_si512())) -#define _mm512_ipcvtbf16_epu8(A) \ +#define _mm512_ipcvts_bf16_epu8(A) \ ((__m512i)__builtin_ia32_vcvtbf162iubs512((__v32bf)(__m512bh)(A))) -#define _mm512_mask_ipcvtbf16_epu8(W, U, A) \ +#define _mm512_mask_ipcvts_bf16_epu8(W, U, A) \ ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ - (__v32hi)_mm512_ipcvtbf16_epu8(A), \ + (__v32hi)_mm512_ipcvts_bf16_epu8(A), \ (__v32hi)(__m512i)(W))) -#define _mm512_maskz_ipcvtbf16_epu8(U, A) \ +#define _mm512_maskz_ipcvts_bf16_epu8(U, A) \ ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ - (__v32hi)_mm512_ipcvtbf16_epu8(A), \ + (__v32hi)_mm512_ipcvts_bf16_epu8(A), \ (__v32hi)_mm512_setzero_si512())) -#define _mm512_ipcvttbf16_epi8(A) \ +#define _mm512_ipcvtts_bf16_epi8(A) \ ((__m512i)__builtin_ia32_vcvttbf162ibs512((__v32bf)(__m512bh)(A))) -#define _mm512_mask_ipcvttbf16_epi8(W, U, A) \ +#define _mm512_mask_ipcvtts_bf16_epi8(W, U, A) \ ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ - (__v32hi)_mm512_ipcvttbf16_epi8(A), \ + (__v32hi)_mm512_ipcvtts_bf16_epi8(A), \ (__v32hi)(__m512i)(W))) -#define _mm512_maskz_ipcvttbf16_epi8(U, A) \ +#define _mm512_maskz_ipcvtts_bf16_epi8(U, A) \ ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ - (__v32hi)_mm512_ipcvttbf16_epi8(A), \ + (__v32hi)_mm512_ipcvtts_bf16_epi8(A), \ (__v32hi)_mm512_setzero_si512())) -#define _mm512_ipcvttbf16_epu8(A) \ +#define _mm512_ipcvtts_bf16_epu8(A) \ ((__m512i)__builtin_ia32_vcvttbf162iubs512((__v32bf)(__m512bh)(A))) -#define _mm512_mask_ipcvttbf16_epu8(W, U, A) \ +#define _mm512_mask_ipcvtts_bf16_epu8(W, U, A) \ ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ - (__v32hi)_mm512_ipcvttbf16_epu8(A), \ + (__v32hi)_mm512_ipcvtts_bf16_epu8(A), \ (__v32hi)(__m512i)(W))) -#define _mm512_maskz_ipcvttbf16_epu8(U, A) \ +#define _mm512_maskz_ipcvtts_bf16_epu8(U, A) \ ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ - (__v32hi)_mm512_ipcvttbf16_epu8(A), \ + (__v32hi)_mm512_ipcvtts_bf16_epu8(A), \ (__v32hi)_mm512_setzero_si512())) -#define _mm512_ipcvtph_epi8(A) \ +#define _mm512_ipcvts_ph_epi8(A) \ ((__m512i)__builtin_ia32_vcvtph2ibs512_mask( \ - (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \ + (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_mask_ipcvtph_epi8(W, U, A) \ +#define _mm512_mask_ipcvts_ph_epi8(W, U, A) \ ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A), \ (__v32hu)(W), (__mmask32)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_maskz_ipcvtph_epi8(U, A) \ +#define _mm512_maskz_ipcvts_ph_epi8(U, A) \ ((__m512i)__builtin_ia32_vcvtph2ibs512_mask( \ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_ipcvt_roundph_epi8(A, R) \ +#define _mm512_ipcvts_roundph_epi8(A, R) \ ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A), \ (__v32hu)_mm512_setzero_si512(), \ - (__mmask32)-1, (const int)R)) + (__mmask32) - 1, (const int)R)) -#define _mm512_mask_ipcvt_roundph_epi8(W, U, A, R) \ +#define _mm512_mask_ipcvts_roundph_epi8(W, U, A, R) \ ((__m512i)__builtin_ia32_vcvtph2ibs512_mask( \ (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), (const int)R)) -#define _mm512_maskz_ipcvt_roundph_epi8(U, A, R) \ +#define _mm512_maskz_ipcvts_roundph_epi8(U, A, R) \ ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A), \ (__v32hu)_mm512_setzero_si512(), \ (__mmask32)(U), (const int)R)) -#define _mm512_ipcvtph_epu8(A) \ +#define _mm512_ipcvts_ph_epu8(A) \ ((__m512i)__builtin_ia32_vcvtph2iubs512_mask( \ - (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \ + (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_mask_ipcvtph_epu8(W, U, A) \ +#define _mm512_mask_ipcvts_ph_epu8(W, U, A) \ ((__m512i)__builtin_ia32_vcvtph2iubs512_mask((__v32hf)(__m512h)(A), \ (__v32hu)(W), (__mmask32)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_maskz_ipcvtph_epu8(U, A) \ +#define _mm512_maskz_ipcvts_ph_epu8(U, A) \ ((__m512i)__builtin_ia32_vcvtph2iubs512_mask( \ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_ipcvt_roundph_epu8(A, R) \ +#define _mm512_ipcvts_roundph_epu8(A, R) \ ((__m512i)__builtin_ia32_vcvtph2iubs512_mask( \ - (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \ + (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \ (const int)R)) -#define _mm512_mask_ipcvt_roundph_epu8(W, U, A, R) \ +#define _mm512_mask_ipcvts_roundph_epu8(W, U, A, R) \ ((__m512i)__builtin_ia32_vcvtph2iubs512_mask( \ (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), (const int)R)) -#define _mm512_maskz_ipcvt_roundph_epu8(U, A, R) \ +#define _mm512_maskz_ipcvts_roundph_epu8(U, A, R) \ ((__m512i)__builtin_ia32_vcvtph2iubs512_mask( \ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \ (const int)R)) -#define _mm512_ipcvtps_epi8(A) \ +#define _mm512_ipcvts_ps_epi8(A) \ ((__m512i)__builtin_ia32_vcvtps2ibs512_mask( \ - (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \ + (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1, \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_mask_ipcvtps_epi8(W, U, A) \ +#define _mm512_mask_ipcvts_ps_epi8(W, U, A) \ ((__m512i)__builtin_ia32_vcvtps2ibs512_mask((__v16sf)(__m512)(A), \ (__v16su)(W), (__mmask16)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_maskz_ipcvtps_epi8(U, A) \ +#define _mm512_maskz_ipcvts_ps_epi8(U, A) \ ((__m512i)__builtin_ia32_vcvtps2ibs512_mask( \ (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_ipcvt_roundps_epi8(A, R) \ +#define _mm512_ipcvts_roundps_epi8(A, R) \ ((__m512i)__builtin_ia32_vcvtps2ibs512_mask((__v16sf)(__m512)(A), \ (__v16su)_mm512_setzero_si512(), \ - (__mmask16)-1, (const int)R)) + (__mmask16) - 1, (const int)R)) -#define _mm512_mask_ipcvt_roundps_epi8(W, U, A, R) \ +#define _mm512_mask_ipcvts_roundps_epi8(W, U, A, R) \ ((__m512i)__builtin_ia32_vcvtps2ibs512_mask( \ (__v16sf)(__m512)(A), (__v16su)(W), (__mmask16)(U), (const int)R)) -#define _mm512_maskz_ipcvt_roundps_epi8(U, A, R) \ +#define _mm512_maskz_ipcvts_roundps_epi8(U, A, R) \ ((__m512i)__builtin_ia32_vcvtps2ibs512_mask((__v16sf)(__m512)(A), \ (__v16su)_mm512_setzero_si512(), \ (__mmask16)(U), (const int)R)) -#define _mm512_ipcvtps_epu8(A) \ +#define _mm512_ipcvts_ps_epu8(A) \ ((__m512i)__builtin_ia32_vcvtps2iubs512_mask( \ - (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \ + (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1, \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_mask_ipcvtps_epu8(W, U, A) \ +#define _mm512_mask_ipcvts_ps_epu8(W, U, A) \ ((__m512i)__builtin_ia32_vcvtps2iubs512_mask((__v16sf)(__m512)(A), \ (__v16su)(W), (__mmask16)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_maskz_ipcvtps_epu8(U, A) \ +#define _mm512_maskz_ipcvts_ps_epu8(U, A) \ ((__m512i)__builtin_ia32_vcvtps2iubs512_mask( \ (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_ipcvt_roundps_epu8(A, R) \ +#define _mm512_ipcvts_roundps_epu8(A, R) \ ((__m512i)__builtin_ia32_vcvtps2iubs512_mask( \ - (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \ + (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1, \ (const int)R)) -#define _mm512_mask_ipcvt_roundps_epu8(W, U, A, R) \ +#define _mm512_mask_ipcvts_roundps_epu8(W, U, A, R) \ ((__m512i)__builtin_ia32_vcvtps2iubs512_mask( \ (__v16sf)(__m512)(A), (__v16su)(W), (__mmask16)(U), (const int)R)) -#define _mm512_maskz_ipcvt_roundps_epu8(U, A, R) \ +#define _mm512_maskz_ipcvts_roundps_epu8(U, A, R) \ ((__m512i)__builtin_ia32_vcvtps2iubs512_mask( \ (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \ (const int)R)) -#define _mm512_ipcvttph_epi8(A) \ +#define _mm512_ipcvtts_ph_epi8(A) \ ((__m512i)__builtin_ia32_vcvttph2ibs512_mask( \ - (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \ + (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_mask_ipcvttph_epi8(W, U, A) \ +#define _mm512_mask_ipcvtts_ph_epi8(W, U, A) \ ((__m512i)__builtin_ia32_vcvttph2ibs512_mask((__v32hf)(__m512h)(A), \ (__v32hu)(W), (__mmask32)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_maskz_ipcvttph_epi8(U, A) \ +#define _mm512_maskz_ipcvtts_ph_epi8(U, A) \ ((__m512i)__builtin_ia32_vcvttph2ibs512_mask( \ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_ipcvtt_roundph_epi8(A, S) \ +#define _mm512_ipcvtts_roundph_epi8(A, S) \ ((__m512i)__builtin_ia32_vcvttph2ibs512_mask( \ - (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \ + (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \ S)) -#define _mm512_mask_ipcvtt_roundph_epi8(W, U, A, S) \ +#define _mm512_mask_ipcvtts_roundph_epi8(W, U, A, S) \ ((__m512i)__builtin_ia32_vcvttph2ibs512_mask( \ (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), S)) -#define _mm512_maskz_ipcvtt_roundph_epi8(U, A, S) \ +#define _mm512_maskz_ipcvtts_roundph_epi8(U, A, S) \ ((__m512i)__builtin_ia32_vcvttph2ibs512_mask( \ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \ S)) -#define _mm512_ipcvttph_epu8(A) \ +#define _mm512_ipcvtts_ph_epu8(A) \ ((__m512i)__builtin_ia32_vcvttph2iubs512_mask( \ - (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \ + (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_mask_ipcvttph_epu8(W, U, A) \ +#define _mm512_mask_ipcvtts_ph_epu8(W, U, A) \ ((__m512i)__builtin_ia32_vcvttph2iubs512_mask((__v32hf)(__m512h)(A), \ (__v32hu)(W), (__mmask32)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_maskz_ipcvttph_epu8(U, A) \ +#define _mm512_maskz_ipcvtts_ph_epu8(U, A) \ ((__m512i)__builtin_ia32_vcvttph2iubs512_mask( \ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_ipcvtt_roundph_epu8(A, S) \ +#define _mm512_ipcvtts_roundph_epu8(A, S) \ ((__m512i)__builtin_ia32_vcvttph2iubs512_mask( \ - (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \ + (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \ S)) -#define _mm512_mask_ipcvtt_roundph_epu8(W, U, A, S) \ +#define _mm512_mask_ipcvtts_roundph_epu8(W, U, A, S) \ ((__m512i)__builtin_ia32_vcvttph2iubs512_mask( \ (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), S)) -#define _mm512_maskz_ipcvtt_roundph_epu8(U, A, S) \ +#define _mm512_maskz_ipcvtts_roundph_epu8(U, A, S) \ ((__m512i)__builtin_ia32_vcvttph2iubs512_mask( \ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \ S)) -#define _mm512_ipcvttps_epi8(A) \ +#define _mm512_ipcvtts_ps_epi8(A) \ ((__m512i)__builtin_ia32_vcvttps2ibs512_mask( \ - (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \ + (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1, \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_mask_ipcvttps_epi8(W, U, A) \ +#define _mm512_mask_ipcvtts_ps_epi8(W, U, A) \ ((__m512i)__builtin_ia32_vcvttps2ibs512_mask((__v16sf)(__m512h)(A), \ (__v16su)(W), (__mmask16)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_maskz_ipcvttps_epi8(U, A) \ +#define _mm512_maskz_ipcvtts_ps_epi8(U, A) \ ((__m512i)__builtin_ia32_vcvttps2ibs512_mask( \ (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_ipcvtt_roundps_epi8(A, S) \ +#define _mm512_ipcvtts_roundps_epi8(A, S) \ ((__m512i)__builtin_ia32_vcvttps2ibs512_mask( \ - (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \ + (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1, \ S)) -#define _mm512_mask_ipcvtt_roundps_epi8(W, U, A, S) \ +#define _mm512_mask_ipcvtts_roundps_epi8(W, U, A, S) \ ((__m512i)__builtin_ia32_vcvttps2ibs512_mask( \ (__v16sf)(__m512h)(A), (__v16su)(W), (__mmask16)(U), S)) -#define _mm512_maskz_ipcvtt_roundps_epi8(U, A, S) \ +#define _mm512_maskz_ipcvtts_roundps_epi8(U, A, S) \ ((__m512i)__builtin_ia32_vcvttps2ibs512_mask( \ (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \ S)) -#define _mm512_ipcvttps_epu8(A) \ +#define _mm512_ipcvtts_ps_epu8(A) \ ((__m512i)__builtin_ia32_vcvttps2iubs512_mask( \ - (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \ + (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1, \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_mask_ipcvttps_epu8(W, U, A) \ +#define _mm512_mask_ipcvtts_ps_epu8(W, U, A) \ ((__m512i)__builtin_ia32_vcvttps2iubs512_mask((__v16sf)(__m512h)(A), \ (__v16su)(W), (__mmask16)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_maskz_ipcvttps_epu8(U, A) \ +#define _mm512_maskz_ipcvtts_ps_epu8(U, A) \ ((__m512i)__builtin_ia32_vcvttps2iubs512_mask( \ (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm512_ipcvtt_roundps_epu8(A, S) \ +#define _mm512_ipcvtts_roundps_epu8(A, S) \ ((__m512i)__builtin_ia32_vcvttps2iubs512_mask( \ - (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \ + (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1, \ S)) -#define _mm512_mask_ipcvtt_roundps_epu8(W, U, A, S) \ +#define _mm512_mask_ipcvtts_roundps_epu8(W, U, A, S) \ ((__m512i)__builtin_ia32_vcvttps2iubs512_mask( \ (__v16sf)(__m512h)(A), (__v16su)(W), (__mmask16)(U), S)) -#define _mm512_maskz_ipcvtt_roundps_epu8(U, A, S) \ +#define _mm512_maskz_ipcvtts_roundps_epu8(U, A, S) \ ((__m512i)__builtin_ia32_vcvttps2iubs512_mask( \ (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \ S)) diff --git a/clang/lib/Headers/avx10_2convertintrin.h b/clang/lib/Headers/avx10_2convertintrin.h index 07722090c30ee..bd6ff6099d8a7 100644 --- a/clang/lib/Headers/avx10_2convertintrin.h +++ b/clang/lib/Headers/avx10_2convertintrin.h @@ -63,22 +63,8 @@ _mm256_maskz_cvtx2ps_ph(__mmask16 __U, __m256 __A, __m256 __B) { _MM_FROUND_CUR_DIRECTION); } -#define _mm256_cvtx_round2ps_ph(A, B, R) \ - ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask( \ - (__v8sf)(A), (__v8sf)(B), (__v16hf)_mm256_undefined_ph(), \ - (__mmask16)(-1), (const int)(R))) - -#define _mm256_mask_cvtx_round2ps_ph(W, U, A, B, R) \ - ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask( \ - (__v8sf)(A), (__v8sf)(B), (__v16hf)(W), (__mmask16)(U), (const int)(R))) - -#define _mm256_maskz_cvtx_round2ps_ph(U, A, B, R) \ - ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask( \ - (__v8sf)(A), (__v8sf)(B), (__v16hf)(_mm256_setzero_ph()), \ - (__mmask16)(U), (const int)(R))) - -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtbiasph_bf8(__m128i __A, - __m128h __B) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_cvtbiasph_bf8(__m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1); } @@ -117,39 +103,39 @@ _mm256_maskz_cvtbiasph_bf8(__mmask16 __U, __m256i __A, __m256h __B) { } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_cvtbiassph_bf8(__m128i __A, __m128h __B) { +_mm_cvts_biasph_bf8(__m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvtbiassph_bf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) { +_mm_mask_cvts_biasph_bf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvtbiassph_bf8(__mmask8 __U, __m128i __A, __m128h __B) { +_mm_maskz_cvts_biasph_bf8(__mmask8 __U, __m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_cvtbiassph_bf8(__m256i __A, __m256h __B) { +_mm256_cvts_biasph_bf8(__m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask( (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiassph_bf8( +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvts_biasph_bf8( __m128i __W, __mmask16 __U, __m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask( (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (__mmask16)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtbiassph_bf8(__mmask16 __U, __m256i __A, __m256h __B) { +_mm256_maskz_cvts_biasph_bf8(__mmask16 __U, __m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask( (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U); @@ -195,39 +181,39 @@ _mm256_maskz_cvtbiasph_hf8(__mmask16 __U, __m256i __A, __m256h __B) { } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_cvtbiassph_hf8(__m128i __A, __m128h __B) { +_mm_cvts_biasph_hf8(__m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvtbiassph_hf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) { +_mm_mask_cvts_biasph_hf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvtbiassph_hf8(__mmask8 __U, __m128i __A, __m128h __B) { +_mm_maskz_cvts_biasph_hf8(__mmask8 __U, __m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_cvtbiassph_hf8(__m256i __A, __m256h __B) { +_mm256_cvts_biasph_hf8(__m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask( (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiassph_hf8( +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvts_biasph_hf8( __m128i __W, __mmask16 __U, __m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask( (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (__mmask16)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtbiassph_hf8(__mmask16 __U, __m256i __A, __m256h __B) { +_mm256_maskz_cvts_biasph_hf8(__mmask16 __U, __m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask( (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U); @@ -270,40 +256,40 @@ _mm256_maskz_cvt2ph_bf8(__mmask32 __U, __m256h __A, __m256h __B) { (__v32qi)(__m256i)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvts2ph_bf8(__m128h __A, - __m128h __B) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvts_2ph_bf8(__m128h __A, + __m128h __B) { return (__m128i)__builtin_ia32_vcvt2ph2bf8s_128((__v8hf)(__A), (__v8hf)(__B)); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvts2ph_bf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) { +_mm_mask_cvts_2ph_bf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) { return (__m128i)__builtin_ia32_selectb_128( - (__mmask16)__U, (__v16qi)_mm_cvts2ph_bf8(__A, __B), (__v16qi)__W); + (__mmask16)__U, (__v16qi)_mm_cvts_2ph_bf8(__A, __B), (__v16qi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvts2ph_bf8(__mmask16 __U, __m128h __A, __m128h __B) { +_mm_maskz_cvts_2ph_bf8(__mmask16 __U, __m128h __A, __m128h __B) { return (__m128i)__builtin_ia32_selectb_128( - (__mmask16)__U, (__v16qi)_mm_cvts2ph_bf8(__A, __B), + (__mmask16)__U, (__v16qi)_mm_cvts_2ph_bf8(__A, __B), (__v16qi)(__m128i)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvts2ph_bf8(__m256h __A, __m256h __B) { +_mm256_cvts_2ph_bf8(__m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_vcvt2ph2bf8s_256((__v16hf)(__A), (__v16hf)(__B)); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvts2ph_bf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { +_mm256_mask_cvts_2ph_bf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_selectb_256( - (__mmask32)__U, (__v32qi)_mm256_cvts2ph_bf8(__A, __B), (__v32qi)__W); + (__mmask32)__U, (__v32qi)_mm256_cvts_2ph_bf8(__A, __B), (__v32qi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvts2ph_bf8(__mmask32 __U, __m256h __A, __m256h __B) { +_mm256_maskz_cvts_2ph_bf8(__mmask32 __U, __m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_selectb_256( - (__mmask32)__U, (__v32qi)_mm256_cvts2ph_bf8(__A, __B), + (__mmask32)__U, (__v32qi)_mm256_cvts_2ph_bf8(__A, __B), (__v32qi)(__m256i)_mm256_setzero_si256()); } @@ -344,40 +330,40 @@ _mm256_maskz_cvt2ph_hf8(__mmask32 __U, __m256h __A, __m256h __B) { (__v32qi)(__m256i)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvts2ph_hf8(__m128h __A, - __m128h __B) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvts_2ph_hf8(__m128h __A, + __m128h __B) { return (__m128i)__builtin_ia32_vcvt2ph2hf8s_128((__v8hf)(__A), (__v8hf)(__B)); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvts2ph_hf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) { +_mm_mask_cvts_2ph_hf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) { return (__m128i)__builtin_ia32_selectb_128( - (__mmask16)__U, (__v16qi)_mm_cvts2ph_hf8(__A, __B), (__v16qi)__W); + (__mmask16)__U, (__v16qi)_mm_cvts_2ph_hf8(__A, __B), (__v16qi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvts2ph_hf8(__mmask16 __U, __m128h __A, __m128h __B) { +_mm_maskz_cvts_2ph_hf8(__mmask16 __U, __m128h __A, __m128h __B) { return (__m128i)__builtin_ia32_selectb_128( - (__mmask16)__U, (__v16qi)_mm_cvts2ph_hf8(__A, __B), + (__mmask16)__U, (__v16qi)_mm_cvts_2ph_hf8(__A, __B), (__v16qi)(__m128i)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvts2ph_hf8(__m256h __A, __m256h __B) { +_mm256_cvts_2ph_hf8(__m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_vcvt2ph2hf8s_256((__v16hf)(__A), (__v16hf)(__B)); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvts2ph_hf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { +_mm256_mask_cvts_2ph_hf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_selectb_256( - (__mmask32)__U, (__v32qi)_mm256_cvts2ph_hf8(__A, __B), (__v32qi)__W); + (__mmask32)__U, (__v32qi)_mm256_cvts_2ph_hf8(__A, __B), (__v32qi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvts2ph_hf8(__mmask32 __U, __m256h __A, __m256h __B) { +_mm256_maskz_cvts_2ph_hf8(__mmask32 __U, __m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_selectb_256( - (__mmask32)__U, (__v32qi)_mm256_cvts2ph_hf8(__A, __B), + (__mmask32)__U, (__v32qi)_mm256_cvts_2ph_hf8(__A, __B), (__v32qi)(__m256i)_mm256_setzero_si256()); } @@ -449,36 +435,37 @@ _mm256_maskz_cvtph_bf8(__mmask16 __U, __m256h __A) { (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsph_bf8(__m128h __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvts_ph_bf8(__m128h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8s_128_mask( (__v8hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask8)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvtsph_bf8(__m128i __W, __mmask8 __U, __m128h __A) { +_mm_mask_cvts_ph_bf8(__m128i __W, __mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8s_128_mask( (__v8hf)__A, (__v16qi)(__m128i)__W, (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvtsph_bf8(__mmask8 __U, __m128h __A) { +_mm_maskz_cvts_ph_bf8(__mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8s_128_mask( (__v8hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsph_bf8(__m256h __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS256 +_mm256_cvts_ph_bf8(__m256h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8s_256_mask( (__v16hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtsph_bf8(__m128i __W, __mmask16 __U, __m256h __A) { +_mm256_mask_cvts_ph_bf8(__m128i __W, __mmask16 __U, __m256h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8s_256_mask( (__v16hf)__A, (__v16qi)(__m128i)__W, (__mmask16)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtsph_bf8(__mmask16 __U, __m256h __A) { +_mm256_maskz_cvts_ph_bf8(__mmask16 __U, __m256h __A) { return (__m128i)__builtin_ia32_vcvtph2bf8s_256_mask( (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U); } @@ -517,36 +504,37 @@ _mm256_maskz_cvtph_hf8(__mmask16 __U, __m256h __A) { (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsph_hf8(__m128h __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvts_ph_hf8(__m128h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8s_128_mask( (__v8hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask8)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvtsph_hf8(__m128i __W, __mmask8 __U, __m128h __A) { +_mm_mask_cvts_ph_hf8(__m128i __W, __mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8s_128_mask( (__v8hf)__A, (__v16qi)(__m128i)__W, (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvtsph_hf8(__mmask8 __U, __m128h __A) { +_mm_maskz_cvts_ph_hf8(__mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8s_128_mask( (__v8hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsph_hf8(__m256h __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS256 +_mm256_cvts_ph_hf8(__m256h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8s_256_mask( (__v16hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtsph_hf8(__m128i __W, __mmask16 __U, __m256h __A) { +_mm256_mask_cvts_ph_hf8(__m128i __W, __mmask16 __U, __m256h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8s_256_mask( (__v16hf)__A, (__v16qi)(__m128i)__W, (__mmask16)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtsph_hf8(__mmask16 __U, __m256h __A) { +_mm256_maskz_cvts_ph_hf8(__mmask16 __U, __m256h __A) { return (__m128i)__builtin_ia32_vcvtph2hf8s_256_mask( (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U); } diff --git a/clang/lib/Headers/avx10_2minmaxintrin.h b/clang/lib/Headers/avx10_2minmaxintrin.h index 8164d49d89f1f..a59b74dbc54b3 100644 --- a/clang/lib/Headers/avx10_2minmaxintrin.h +++ b/clang/lib/Headers/avx10_2minmaxintrin.h @@ -80,21 +80,6 @@ (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \ (__v4df)_mm256_setzero_pd(), (__mmask8)(U), _MM_FROUND_NO_EXC)) -#define _mm256_minmax_round_pd(A, B, C, R) \ - ((__m256d)__builtin_ia32_vminmaxpd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \ - (__v4df)_mm256_undefined_pd(), (__mmask8)-1, (int)(R))) - -#define _mm256_mask_minmax_round_pd(W, U, A, B, C, R) \ - ((__m256d)__builtin_ia32_vminmaxpd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \ - (__v4df)(__m256d)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_minmax_round_pd(U, A, B, C, R) \ - ((__m256d)__builtin_ia32_vminmaxpd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \ - (__v4df)_mm256_setzero_pd(), (__mmask8)(U), (int)(R))) - #define _mm_minmax_ph(A, B, C) \ ((__m128h)__builtin_ia32_vminmaxph128_mask( \ (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C), \ @@ -125,21 +110,6 @@ (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C), \ (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), _MM_FROUND_NO_EXC)) -#define _mm256_minmax_round_ph(A, B, C, R) \ - ((__m256h)__builtin_ia32_vminmaxph256_round_mask( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C), \ - (__v16hf)_mm256_undefined_ph(), (__mmask16)-1, (int)(R))) - -#define _mm256_mask_minmax_round_ph(W, U, A, B, C, R) \ - ((__m256h)__builtin_ia32_vminmaxph256_round_mask( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (C), \ - (__v16hf)(__m256h)(W), (__mmask16)(U), (int)(R))) - -#define _mm256_maskz_minmax_round_ph(U, A, B, C, R) \ - ((__m256h)__builtin_ia32_vminmaxph256_round_mask( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C), \ - (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R))) - #define _mm_minmax_ps(A, B, C) \ ((__m128)__builtin_ia32_vminmaxps128_mask( \ (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C), \ @@ -170,21 +140,6 @@ (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), \ (__v8sf)_mm256_setzero_ps(), (__mmask8)(U), _MM_FROUND_NO_EXC)) -#define _mm256_minmax_round_ps(A, B, C, R) \ - ((__m256)__builtin_ia32_vminmaxps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), \ - (__v8sf)_mm256_undefined_ps(), (__mmask8)-1, (int)(R))) - -#define _mm256_mask_minmax_round_ps(W, U, A, B, C, R) \ - ((__m256)__builtin_ia32_vminmaxps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), (__v8sf)(__m256)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_minmax_round_ps(U, A, B, C, R) \ - ((__m256)__builtin_ia32_vminmaxps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), \ - (__v8sf)_mm256_setzero_ps(), (__mmask8)(U), (int)(R))) - #define _mm_minmax_sd(A, B, C) \ ((__m128d)__builtin_ia32_vminmaxsd_round_mask( \ (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C), \ diff --git a/clang/lib/Headers/avx10_2niintrin.h b/clang/lib/Headers/avx10_2niintrin.h index c91a7b57c7527..992be18f7720a 100644 --- a/clang/lib/Headers/avx10_2niintrin.h +++ b/clang/lib/Headers/avx10_2niintrin.h @@ -402,1672 +402,6 @@ static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpwuuds_epi32( (__v8si)_mm256_setzero_si256()); } -/* YMM Rounding */ -#define _mm256_add_round_pd(A, B, R) \ - ((__m256d)__builtin_ia32_vaddpd256_round((__v4df)(__m256d)(A), \ - (__v4df)(__m256d)(B), (int)(R))) - -#define _mm256_mask_add_round_pd(W, U, A, B, R) \ - ((__m256d)__builtin_ia32_selectpd_256( \ - (__mmask8)(U), (__v4df)_mm256_add_round_pd((A), (B), (R)), \ - (__v4df)(__m256d)(W))) - -#define _mm256_maskz_add_round_pd(U, A, B, R) \ - ((__m256d)__builtin_ia32_selectpd_256( \ - (__mmask8)(U), (__v4df)_mm256_add_round_pd((A), (B), (R)), \ - (__v4df)_mm256_setzero_pd())) - -#define _mm256_add_round_ph(A, B, R) \ - ((__m256h)__builtin_ia32_vaddph256_round((__v16hf)(__m256h)(A), \ - (__v16hf)(__m256h)(B), (int)(R))) - -#define _mm256_mask_add_round_ph(W, U, A, B, R) \ - ((__m256h)__builtin_ia32_selectph_256( \ - (__mmask16)(U), (__v16hf)_mm256_add_round_ph((A), (B), (R)), \ - (__v16hf)(__m256h)(W))) - -#define _mm256_maskz_add_round_ph(U, A, B, R) \ - ((__m256h)__builtin_ia32_selectph_256( \ - (__mmask16)(U), (__v16hf)_mm256_add_round_ph((A), (B), (R)), \ - (__v16hf)_mm256_setzero_ph())) - -#define _mm256_add_round_ps(A, B, R) \ - ((__m256)__builtin_ia32_vaddps256_round((__v8sf)(__m256)(A), \ - (__v8sf)(__m256)(B), (int)(R))) - -#define _mm256_mask_add_round_ps(W, U, A, B, R) \ - ((__m256)__builtin_ia32_selectps_256( \ - (__mmask8)(U), (__v8sf)_mm256_add_round_ps((A), (B), (R)), \ - (__v8sf)(__m256)(W))) - -#define _mm256_maskz_add_round_ps(U, A, B, R) \ - ((__m256)__builtin_ia32_selectps_256( \ - (__mmask8)(U), (__v8sf)_mm256_add_round_ps((A), (B), (R)), \ - (__v8sf)_mm256_setzero_ps())) - -#define _mm256_cmp_round_pd_mask(A, B, P, R) \ - ((__mmask8)__builtin_ia32_vcmppd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(P), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cmp_round_pd_mask(U, A, B, P, R) \ - ((__mmask8)__builtin_ia32_vcmppd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(P), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cmp_round_ph_mask(A, B, P, R) \ - ((__mmask16)__builtin_ia32_vcmpph256_round_mask( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(P), (__mmask16)-1, \ - (int)(R))) - -#define _mm256_mask_cmp_round_ph_mask(U, A, B, P, R) \ - ((__mmask16)__builtin_ia32_vcmpph256_round_mask( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(P), (__mmask16)(U), \ - (int)(R))) - -#define _mm256_cmp_round_ps_mask(A, B, P, R) \ - ((__mmask8)__builtin_ia32_vcmpps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(P), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cmp_round_ps_mask(U, A, B, P, R) \ - ((__mmask8)__builtin_ia32_vcmpps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(P), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvt_roundepi32_ph(A, R) \ - ((__m128h)__builtin_ia32_vcvtdq2ph256_round_mask( \ - (__v8si)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R))) - -#define _mm256_mask_cvt_roundepi32_ph(W, U, A, R) \ - ((__m128h)__builtin_ia32_vcvtdq2ph256_round_mask((__v8si)(A), (__v8hf)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundepi32_ph(U, A, R) \ - ((__m128h)__builtin_ia32_vcvtdq2ph256_round_mask( \ - (__v8si)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R))) - -#define _mm256_cvt_roundepi32_ps(A, R) \ - ((__m256)__builtin_ia32_vcvtdq2ps256_round_mask((__v8si)(__m256i)(A), \ - (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_mask_cvt_roundepi32_ps(W, U, A, R) \ - ((__m256)__builtin_ia32_vcvtdq2ps256_round_mask( \ - (__v8si)(__m256i)(A), (__v8sf)(__m256)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundepi32_ps(U, A, R) \ - ((__m256)__builtin_ia32_vcvtdq2ps256_round_mask((__v8si)(__m256i)(A), \ - (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_cvt_roundpd_epi32(A, R) \ - ((__m128i)__builtin_ia32_vcvtpd2dq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4si)_mm_setzero_si128(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvt_roundpd_epi32(W, U, A, R) \ - ((__m128i)__builtin_ia32_vcvtpd2dq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4si)(__m128i)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundpd_epi32(U, A, R) \ - ((__m128i)__builtin_ia32_vcvtpd2dq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4si)_mm_setzero_si128(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvt_roundpd_ph(A, R) \ - ((__m128h)__builtin_ia32_vcvtpd2ph256_round_mask( \ - (__v4df)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R))) - -#define _mm256_mask_cvt_roundpd_ph(W, U, A, R) \ - ((__m128h)__builtin_ia32_vcvtpd2ph256_round_mask((__v4df)(A), (__v8hf)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundpd_ph(U, A, R) \ - ((__m128h)__builtin_ia32_vcvtpd2ph256_round_mask( \ - (__v4df)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R))) - -#define _mm256_cvt_roundpd_ps(A, R) \ - ((__m128)__builtin_ia32_vcvtpd2ps256_round_mask( \ - (__v4df)(__m256d)(A), (__v4sf)_mm_setzero_ps(), (__mmask8)-1, (int)(R))) - -#define _mm256_mask_cvt_roundpd_ps(W, U, A, R) \ - ((__m128)__builtin_ia32_vcvtpd2ps256_round_mask( \ - (__v4df)(__m256d)(A), (__v4sf)(__m128)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundpd_ps(U, A, R) \ - ((__m128)__builtin_ia32_vcvtpd2ps256_round_mask((__v4df)(__m256d)(A), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_cvt_roundpd_epi64(A, R) \ - ((__m256i)__builtin_ia32_vcvtpd2qq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvt_roundpd_epi64(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvtpd2qq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4di)(__m256i)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundpd_epi64(U, A, R) \ - ((__m256i)__builtin_ia32_vcvtpd2qq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvt_roundpd_epu32(A, R) \ - ((__m128i)__builtin_ia32_vcvtpd2udq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvt_roundpd_epu32(W, U, A, R) \ - ((__m128i)__builtin_ia32_vcvtpd2udq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4su)(__m128i)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundpd_epu32(U, A, R) \ - ((__m128i)__builtin_ia32_vcvtpd2udq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4su)_mm_setzero_si128(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvt_roundpd_epu64(A, R) \ - ((__m256i)__builtin_ia32_vcvtpd2uqq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvt_roundpd_epu64(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvtpd2uqq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4du)(__m256i)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundpd_epu64(U, A, R) \ - ((__m256i)__builtin_ia32_vcvtpd2uqq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvt_roundph_epi32(A, R) \ - ((__m256i)__builtin_ia32_vcvtph2dq256_round_mask( \ - (__v8hf)(A), (__v8si)_mm256_undefined_si256(), (__mmask8)(-1), \ - (int)(R))) - -#define _mm256_mask_cvt_roundph_epi32(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvtph2dq256_round_mask((__v8hf)(A), (__v8si)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundph_epi32(U, A, R) \ - ((__m256i)__builtin_ia32_vcvtph2dq256_round_mask( \ - (__v8hf)(A), (__v8si)_mm256_setzero_si256(), (__mmask8)(U), (int)(R))) - -#define _mm256_cvt_roundph_pd(A, R) \ - ((__m256d)__builtin_ia32_vcvtph2pd256_round_mask( \ - (__v8hf)(A), (__v4df)_mm256_undefined_pd(), (__mmask8)(-1), (int)(R))) - -#define _mm256_mask_cvt_roundph_pd(W, U, A, R) \ - ((__m256d)__builtin_ia32_vcvtph2pd256_round_mask((__v8hf)(A), (__v4df)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundph_pd(U, A, R) \ - ((__m256d)__builtin_ia32_vcvtph2pd256_round_mask( \ - (__v8hf)(A), (__v4df)_mm256_setzero_pd(), (__mmask8)(U), (int)(R))) - -#define _mm256_cvtx_roundph_ps(A, R) \ - ((__m256)__builtin_ia32_vcvtph2psx256_round_mask( \ - (__v8hf)(A), (__v8sf)_mm256_undefined_ps(), (__mmask8)(-1), (int)(R))) - -#define _mm256_mask_cvtx_roundph_ps(W, U, A, R) \ - ((__m256)__builtin_ia32_vcvtph2psx256_round_mask((__v8hf)(A), (__v8sf)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvtx_roundph_ps(U, A, R) \ - ((__m256)__builtin_ia32_vcvtph2psx256_round_mask( \ - (__v8hf)(A), (__v8sf)_mm256_setzero_ps(), (__mmask8)(U), (int)(R))) - -#define _mm256_cvt_roundph_epi64(A, R) \ - ((__m256i)__builtin_ia32_vcvtph2qq256_round_mask( \ - (__v8hf)(A), (__v4di)_mm256_undefined_si256(), (__mmask8)(-1), \ - (int)(R))) - -#define _mm256_mask_cvt_roundph_epi64(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvtph2qq256_round_mask((__v8hf)(A), (__v4di)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundph_epi64(U, A, R) \ - ((__m256i)__builtin_ia32_vcvtph2qq256_round_mask( \ - (__v8hf)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)(U), (int)(R))) - -#define _mm256_cvt_roundph_epu32(A, R) \ - ((__m256i)__builtin_ia32_vcvtph2udq256_round_mask( \ - (__v8hf)(A), (__v8su)_mm256_undefined_si256(), (__mmask8)(-1), \ - (int)(R))) - -#define _mm256_mask_cvt_roundph_epu32(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvtph2udq256_round_mask((__v8hf)(A), (__v8su)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundph_epu32(U, A, R) \ - ((__m256i)__builtin_ia32_vcvtph2udq256_round_mask( \ - (__v8hf)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)(U), (int)(R))) - -#define _mm256_cvt_roundph_epu64(A, R) \ - ((__m256i)__builtin_ia32_vcvtph2uqq256_round_mask( \ - (__v8hf)(A), (__v4du)_mm256_undefined_si256(), (__mmask8)(-1), \ - (int)(R))) - -#define _mm256_mask_cvt_roundph_epu64(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvtph2uqq256_round_mask((__v8hf)(A), (__v4du)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundph_epu64(U, A, R) \ - ((__m256i)__builtin_ia32_vcvtph2uqq256_round_mask( \ - (__v8hf)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)(U), (int)(R))) - -#define _mm256_cvt_roundph_epu16(A, R) \ - ((__m256i)__builtin_ia32_vcvtph2uw256_round_mask( \ - (__v16hf)(A), (__v16hu)_mm256_undefined_si256(), (__mmask16)(-1), \ - (int)(R))) - -#define _mm256_mask_cvt_roundph_epu16(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvtph2uw256_round_mask((__v16hf)(A), (__v16hu)(W), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundph_epu16(U, A, R) \ - ((__m256i)__builtin_ia32_vcvtph2uw256_round_mask( \ - (__v16hf)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)(U), \ - (int)(R))) - -#define _mm256_cvt_roundph_epi16(A, R) \ - ((__m256i)__builtin_ia32_vcvtph2w256_round_mask( \ - (__v16hf)(A), (__v16hi)_mm256_undefined_si256(), (__mmask16)(-1), \ - (int)(R))) - -#define _mm256_mask_cvt_roundph_epi16(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvtph2w256_round_mask((__v16hf)(A), (__v16hi)(W), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundph_epi16(U, A, R) \ - ((__m256i)__builtin_ia32_vcvtph2w256_round_mask( \ - (__v16hf)(A), (__v16hi)_mm256_setzero_si256(), (__mmask16)(U), \ - (int)(R))) - -#define _mm256_cvt_roundps_epi32(A, R) \ - ((__m256i)__builtin_ia32_vcvtps2dq256_round_mask( \ - (__v8sf)(__m256)(A), (__v8si)_mm256_setzero_si256(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvt_roundps_epi32(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvtps2dq256_round_mask( \ - (__v8sf)(__m256)(A), (__v8si)(__m256i)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundps_epi32(U, A, R) \ - ((__m256i)__builtin_ia32_vcvtps2dq256_round_mask( \ - (__v8sf)(__m256)(A), (__v8si)_mm256_setzero_si256(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvt_roundps_pd(A, R) \ - ((__m256d)__builtin_ia32_vcvtps2pd256_round_mask( \ - (__v4sf)(__m128)(A), (__v4df)_mm256_undefined_pd(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvt_roundps_pd(W, U, A, R) \ - ((__m256d)__builtin_ia32_vcvtps2pd256_round_mask( \ - (__v4sf)(__m128)(A), (__v4df)(__m256d)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundps_pd(U, A, R) \ - ((__m256d)__builtin_ia32_vcvtps2pd256_round_mask( \ - (__v4sf)(__m128)(A), (__v4df)_mm256_setzero_pd(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvt_roundps_ph(A, I) \ - ((__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ - (__v8hi)_mm_undefined_si128(), \ - (__mmask8)-1)) - -/* FIXME: We may use these way in future. -#define _mm256_cvt_roundps_ph(A, I) \ - ((__m128i)__builtin_ia32_vcvtps2ph256_round_mask( \ - (__v8sf)(__m256)(A), (int)(I), (__v8hi)_mm_undefined_si128(), \ - (__mmask8)-1)) -#define _mm256_mask_cvt_roundps_ph(U, W, A, I) \ - ((__m128i)__builtin_ia32_vcvtps2ph256_round_mask( \ - (__v8sf)(__m256)(A), (int)(I), (__v8hi)(__m128i)(U), (__mmask8)(W))) -#define _mm256_maskz_cvt_roundps_ph(W, A, I) \ - ((__m128i)__builtin_ia32_vcvtps2ph256_round_mask( \ - (__v8sf)(__m256)(A), (int)(I), (__v8hi)_mm_setzero_si128(), \ - (__mmask8)(W))) */ - -#define _mm256_cvtx_roundps_ph(A, R) \ - ((__m128h)__builtin_ia32_vcvtps2phx256_round_mask( \ - (__v8sf)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R))) - -#define _mm256_mask_cvtx_roundps_ph(W, U, A, R) \ - ((__m128h)__builtin_ia32_vcvtps2phx256_round_mask((__v8sf)(A), (__v8hf)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvtx_roundps_ph(U, A, R) \ - ((__m128h)__builtin_ia32_vcvtps2phx256_round_mask( \ - (__v8sf)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R))) - -#define _mm256_cvt_roundps_epi64(A, R) \ - ((__m256i)__builtin_ia32_vcvtps2qq256_round_mask( \ - (__v4sf)(__m128)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvt_roundps_epi64(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvtps2qq256_round_mask( \ - (__v4sf)(__m128)(A), (__v4di)(__m256i)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundps_epi64(U, A, R) \ - ((__m256i)__builtin_ia32_vcvtps2qq256_round_mask( \ - (__v4sf)(__m128)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvt_roundps_epu32(A, R) \ - ((__m256i)__builtin_ia32_vcvtps2udq256_round_mask( \ - (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvt_roundps_epu32(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvtps2udq256_round_mask( \ - (__v8sf)(__m256)(A), (__v8su)(__m256i)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundps_epu32(U, A, R) \ - ((__m256i)__builtin_ia32_vcvtps2udq256_round_mask( \ - (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvt_roundps_epu64(A, R) \ - ((__m256i)__builtin_ia32_vcvtps2uqq256_round_mask( \ - (__v4sf)(__m128)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvt_roundps_epu64(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvtps2uqq256_round_mask( \ - (__v4sf)(__m128)(A), (__v4du)(__m256i)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundps_epu64(U, A, R) \ - ((__m256i)__builtin_ia32_vcvtps2uqq256_round_mask( \ - (__v4sf)(__m128)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvt_roundepi64_pd(A, R) \ - ((__m256d)__builtin_ia32_vcvtqq2pd256_round_mask( \ - (__v4di)(__m256i)(A), (__v4df)_mm256_setzero_pd(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvt_roundepi64_pd(W, U, A, R) \ - ((__m256d)__builtin_ia32_vcvtqq2pd256_round_mask( \ - (__v4di)(__m256i)(A), (__v4df)(__m256d)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundepi64_pd(U, A, R) \ - ((__m256d)__builtin_ia32_vcvtqq2pd256_round_mask( \ - (__v4di)(__m256i)(A), (__v4df)_mm256_setzero_pd(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvt_roundepi64_ph(A, R) \ - ((__m128h)__builtin_ia32_vcvtqq2ph256_round_mask( \ - (__v4di)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R))) - -#define _mm256_mask_cvt_roundepi64_ph(W, U, A, R) \ - ((__m128h)__builtin_ia32_vcvtqq2ph256_round_mask((__v4di)(A), (__v8hf)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundepi64_ph(U, A, R) \ - ((__m128h)__builtin_ia32_vcvtqq2ph256_round_mask( \ - (__v4di)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R))) - -#define _mm256_cvt_roundepi64_ps(A, R) \ - ((__m128)__builtin_ia32_vcvtqq2ps256_round_mask( \ - (__v4di)(__m256i)(A), (__v4sf)_mm_setzero_ps(), (__mmask8)-1, (int)(R))) - -#define _mm256_mask_cvt_roundepi64_ps(W, U, A, R) \ - ((__m128)__builtin_ia32_vcvtqq2ps256_round_mask( \ - (__v4di)(__m256i)(A), (__v4sf)(__m128)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundepi64_ps(U, A, R) \ - ((__m128)__builtin_ia32_vcvtqq2ps256_round_mask((__v4di)(__m256i)(A), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_cvtt_roundpd_epi32(A, R) \ - ((__m128i)__builtin_ia32_vcvttpd2dq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4si)_mm_setzero_si128(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvtt_roundpd_epi32(W, U, A, R) \ - ((__m128i)__builtin_ia32_vcvttpd2dq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4si)(__m128i)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvtt_roundpd_epi32(U, A, R) \ - ((__m128i)__builtin_ia32_vcvttpd2dq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4si)_mm_setzero_si128(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvtt_roundpd_epi64(A, R) \ - ((__m256i)__builtin_ia32_vcvttpd2qq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvtt_roundpd_epi64(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvttpd2qq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4di)(__m256i)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvtt_roundpd_epi64(U, A, R) \ - ((__m256i)__builtin_ia32_vcvttpd2qq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvtt_roundpd_epu32(A, R) \ - ((__m128i)__builtin_ia32_vcvttpd2udq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvtt_roundpd_epu32(W, U, A, R) \ - ((__m128i)__builtin_ia32_vcvttpd2udq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4su)(__m128i)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvtt_roundpd_epu32(U, A, R) \ - ((__m128i)__builtin_ia32_vcvttpd2udq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4su)_mm_setzero_si128(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvtt_roundpd_epu64(A, R) \ - ((__m256i)__builtin_ia32_vcvttpd2uqq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvtt_roundpd_epu64(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvttpd2uqq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4du)(__m256i)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvtt_roundpd_epu64(U, A, R) \ - ((__m256i)__builtin_ia32_vcvttpd2uqq256_round_mask( \ - (__v4df)(__m256d)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvtt_roundph_epi32(A, R) \ - ((__m256i)__builtin_ia32_vcvttph2dq256_round_mask( \ - (__v8hf)(A), (__v8si)_mm256_undefined_si256(), (__mmask8)(-1), \ - (int)(R))) - -#define _mm256_mask_cvtt_roundph_epi32(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvttph2dq256_round_mask((__v8hf)(A), (__v8si)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvtt_roundph_epi32(U, A, R) \ - ((__m256i)__builtin_ia32_vcvttph2dq256_round_mask( \ - (__v8hf)(A), (__v8si)_mm256_setzero_si256(), (__mmask8)(U), (int)(R))) - -#define _mm256_cvtt_roundph_epi64(A, R) \ - ((__m256i)__builtin_ia32_vcvttph2qq256_round_mask( \ - (__v8hf)(A), (__v4di)_mm256_undefined_si256(), (__mmask8)(-1), \ - (int)(R))) - -#define _mm256_mask_cvtt_roundph_epi64(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvttph2qq256_round_mask((__v8hf)(A), (__v4di)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvtt_roundph_epi64(U, A, R) \ - ((__m256i)__builtin_ia32_vcvttph2qq256_round_mask( \ - (__v8hf)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)(U), (int)(R))) - -#define _mm256_cvtt_roundph_epu32(A, R) \ - ((__m256i)__builtin_ia32_vcvttph2udq256_round_mask( \ - (__v8hf)(A), (__v8su)_mm256_undefined_si256(), (__mmask8)(-1), \ - (int)(R))) - -#define _mm256_mask_cvtt_roundph_epu32(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvttph2udq256_round_mask((__v8hf)(A), (__v8su)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvtt_roundph_epu32(U, A, R) \ - ((__m256i)__builtin_ia32_vcvttph2udq256_round_mask( \ - (__v8hf)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)(U), (int)(R))) - -#define _mm256_cvtt_roundph_epu64(A, R) \ - ((__m256i)__builtin_ia32_vcvttph2uqq256_round_mask( \ - (__v8hf)(A), (__v4du)_mm256_undefined_si256(), (__mmask8)(-1), \ - (int)(R))) - -#define _mm256_mask_cvtt_roundph_epu64(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvttph2uqq256_round_mask((__v8hf)(A), (__v4du)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvtt_roundph_epu64(U, A, R) \ - ((__m256i)__builtin_ia32_vcvttph2uqq256_round_mask( \ - (__v8hf)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)(U), (int)(R))) - -#define _mm256_cvtt_roundph_epu16(A, R) \ - ((__m256i)__builtin_ia32_vcvttph2uw256_round_mask( \ - (__v16hf)(A), (__v16hu)_mm256_undefined_si256(), (__mmask16)(-1), \ - (int)(R))) - -#define _mm256_mask_cvtt_roundph_epu16(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvttph2uw256_round_mask( \ - (__v16hf)(A), (__v16hu)(W), (__mmask16)(U), (int)(R))) - -#define _mm256_maskz_cvtt_roundph_epu16(U, A, R) \ - ((__m256i)__builtin_ia32_vcvttph2uw256_round_mask( \ - (__v16hf)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)(U), \ - (int)(R))) - -#define _mm256_cvtt_roundph_epi16(A, R) \ - ((__m256i)__builtin_ia32_vcvttph2w256_round_mask( \ - (__v16hf)(A), (__v16hi)_mm256_undefined_si256(), (__mmask16)(-1), \ - (int)(R))) - -#define _mm256_mask_cvtt_roundph_epi16(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvttph2w256_round_mask((__v16hf)(A), (__v16hi)(W), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_maskz_cvtt_roundph_epi16(U, A, R) \ - ((__m256i)__builtin_ia32_vcvttph2w256_round_mask( \ - (__v16hf)(A), (__v16hi)_mm256_setzero_si256(), (__mmask16)(U), \ - (int)(R))) - -#define _mm256_cvtt_roundps_epi32(A, R) \ - ((__m256i)__builtin_ia32_vcvttps2dq256_round_mask( \ - (__v8sf)(__m256)(A), (__v8si)_mm256_setzero_si256(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvtt_roundps_epi32(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvttps2dq256_round_mask( \ - (__v8sf)(__m256)(A), (__v8si)(__m256i)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvtt_roundps_epi32(U, A, R) \ - ((__m256i)__builtin_ia32_vcvttps2dq256_round_mask( \ - (__v8sf)(__m256)(A), (__v8si)_mm256_setzero_si256(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvtt_roundps_epi64(A, R) \ - ((__m256i)__builtin_ia32_vcvttps2qq256_round_mask( \ - (__v4sf)(__m128)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvtt_roundps_epi64(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvttps2qq256_round_mask( \ - (__v4sf)(__m128)(A), (__v4di)(__m256i)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvtt_roundps_epi64(U, A, R) \ - ((__m256i)__builtin_ia32_vcvttps2qq256_round_mask( \ - (__v4sf)(__m128)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvtt_roundps_epu32(A, R) \ - ((__m256i)__builtin_ia32_vcvttps2udq256_round_mask( \ - (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvtt_roundps_epu32(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvttps2udq256_round_mask( \ - (__v8sf)(__m256)(A), (__v8su)(__m256i)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvtt_roundps_epu32(U, A, R) \ - ((__m256i)__builtin_ia32_vcvttps2udq256_round_mask( \ - (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvtt_roundps_epu64(A, R) \ - ((__m256i)__builtin_ia32_vcvttps2uqq256_round_mask( \ - (__v4sf)(__m128)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvtt_roundps_epu64(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvttps2uqq256_round_mask( \ - (__v4sf)(__m128)(A), (__v4du)(__m256i)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvtt_roundps_epu64(U, A, R) \ - ((__m256i)__builtin_ia32_vcvttps2uqq256_round_mask( \ - (__v4sf)(__m128)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvt_roundepu32_ph(A, R) \ - ((__m128h)__builtin_ia32_vcvtudq2ph256_round_mask( \ - (__v8su)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R))) - -#define _mm256_mask_cvt_roundepu32_ph(W, U, A, R) \ - ((__m128h)__builtin_ia32_vcvtudq2ph256_round_mask((__v8su)(A), (__v8hf)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundepu32_ph(U, A, R) \ - ((__m128h)__builtin_ia32_vcvtudq2ph256_round_mask( \ - (__v8su)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R))) - -#define _mm256_cvt_roundepu32_ps(A, R) \ - ((__m256)__builtin_ia32_vcvtudq2ps256_round_mask( \ - (__v8su)(__m256i)(A), (__v8sf)_mm256_setzero_ps(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvt_roundepu32_ps(W, U, A, R) \ - ((__m256)__builtin_ia32_vcvtudq2ps256_round_mask( \ - (__v8su)(__m256i)(A), (__v8sf)(__m256)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundepu32_ps(U, A, R) \ - ((__m256)__builtin_ia32_vcvtudq2ps256_round_mask( \ - (__v8su)(__m256i)(A), (__v8sf)_mm256_setzero_ps(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvt_roundepu64_pd(A, R) \ - ((__m256d)__builtin_ia32_vcvtuqq2pd256_round_mask( \ - (__v4du)(__m256i)(A), (__v4df)_mm256_setzero_pd(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_cvt_roundepu64_pd(W, U, A, R) \ - ((__m256d)__builtin_ia32_vcvtuqq2pd256_round_mask( \ - (__v4du)(__m256i)(A), (__v4df)(__m256d)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundepu64_pd(U, A, R) \ - ((__m256d)__builtin_ia32_vcvtuqq2pd256_round_mask( \ - (__v4du)(__m256i)(A), (__v4df)_mm256_setzero_pd(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_cvt_roundepu64_ph(A, R) \ - ((__m128h)__builtin_ia32_vcvtuqq2ph256_round_mask( \ - (__v4du)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R))) - -#define _mm256_mask_cvt_roundepu64_ph(W, U, A, R) \ - ((__m128h)__builtin_ia32_vcvtuqq2ph256_round_mask((__v4du)(A), (__v8hf)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundepu64_ph(U, A, R) \ - ((__m128h)__builtin_ia32_vcvtuqq2ph256_round_mask( \ - (__v4du)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R))) - -#define _mm256_cvt_roundepu64_ps(A, R) \ - ((__m128)__builtin_ia32_vcvtuqq2ps256_round_mask( \ - (__v4du)(__m256i)(A), (__v4sf)_mm_setzero_ps(), (__mmask8)-1, (int)(R))) - -#define _mm256_mask_cvt_roundepu64_ps(W, U, A, R) \ - ((__m128)__builtin_ia32_vcvtuqq2ps256_round_mask( \ - (__v4du)(__m256i)(A), (__v4sf)(__m128)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundepu64_ps(U, A, R) \ - ((__m128)__builtin_ia32_vcvtuqq2ps256_round_mask((__v4du)(__m256i)(A), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_cvt_roundepu16_ph(A, R) \ - ((__m256h)__builtin_ia32_vcvtuw2ph256_round_mask( \ - (__v16hu)(A), (__v16hf)_mm256_undefined_ph(), (__mmask16)(-1), \ - (int)(R))) - -#define _mm256_mask_cvt_roundepu16_ph(W, U, A, R) \ - ((__m256h)__builtin_ia32_vcvtuw2ph256_round_mask((__v16hu)(A), (__v16hf)(W), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundepu16_ph(U, A, R) \ - ((__m256h)__builtin_ia32_vcvtuw2ph256_round_mask( \ - (__v16hu)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R))) - -#define _mm256_cvt_roundepi16_ph(A, R) \ - ((__m256h)__builtin_ia32_vcvtw2ph256_round_mask( \ - (__v16hi)(A), (__v16hf)_mm256_undefined_ph(), (__mmask16)(-1), \ - (int)(R))) - -#define _mm256_mask_cvt_roundepi16_ph(W, U, A, R) \ - ((__m256h)__builtin_ia32_vcvtw2ph256_round_mask((__v16hi)(A), (__v16hf)(W), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_maskz_cvt_roundepi16_ph(U, A, R) \ - ((__m256h)__builtin_ia32_vcvtw2ph256_round_mask( \ - (__v16hi)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R))) - -#define _mm256_div_round_pd(A, B, R) \ - ((__m256d)__builtin_ia32_vdivpd256_round((__v4df)(__m256d)(A), \ - (__v4df)(__m256d)(B), (int)(R))) - -#define _mm256_mask_div_round_pd(W, U, A, B, R) \ - ((__m256d)__builtin_ia32_selectpd_256( \ - (__mmask8)(U), (__v4df)_mm256_div_round_pd((A), (B), (R)), \ - (__v4df)(__m256d)(W))) - -#define _mm256_maskz_div_round_pd(U, A, B, R) \ - ((__m256d)__builtin_ia32_selectpd_256( \ - (__mmask8)(U), (__v4df)_mm256_div_round_pd((A), (B), (R)), \ - (__v4df)_mm256_setzero_pd())) - -#define _mm256_div_round_ph(A, B, R) \ - ((__m256h)__builtin_ia32_vdivph256_round((__v16hf)(__m256h)(A), \ - (__v16hf)(__m256h)(B), (int)(R))) - -#define _mm256_mask_div_round_ph(W, U, A, B, R) \ - ((__m256h)__builtin_ia32_selectph_256( \ - (__mmask16)(U), (__v16hf)_mm256_div_round_ph((A), (B), (R)), \ - (__v16hf)(__m256h)(W))) - -#define _mm256_maskz_div_round_ph(U, A, B, R) \ - ((__m256h)__builtin_ia32_selectph_256( \ - (__mmask16)(U), (__v16hf)_mm256_div_round_ph((A), (B), (R)), \ - (__v16hf)_mm256_setzero_ph())) - -#define _mm256_div_round_ps(A, B, R) \ - ((__m256)__builtin_ia32_vdivps256_round((__v8sf)(__m256)(A), \ - (__v8sf)(__m256)(B), (int)(R))) - -#define _mm256_mask_div_round_ps(W, U, A, B, R) \ - ((__m256)__builtin_ia32_selectps_256( \ - (__mmask8)(U), (__v8sf)_mm256_div_round_ps((A), (B), (R)), \ - (__v8sf)(__m256)(W))) - -#define _mm256_maskz_div_round_ps(U, A, B, R) \ - ((__m256)__builtin_ia32_selectps_256( \ - (__mmask8)(U), (__v8sf)_mm256_div_round_ps((A), (B), (R)), \ - (__v8sf)_mm256_setzero_ps())) - -#define _mm256_fcmadd_round_pch(A, B, C, R) \ - ((__m256h)__builtin_ia32_vfcmaddcph256_round_mask3( \ - (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(C), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_mask_fcmadd_round_pch(A, U, B, C, R) \ - ((__m256h)__builtin_ia32_vfcmaddcph256_round_mask( \ - (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_mask3_fcmadd_round_pch(A, B, C, U, R) \ - ((__m256h)__builtin_ia32_vfcmaddcph256_round_mask3( \ - (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_fcmadd_round_pch(U, A, B, C, R) \ - ((__m256h)__builtin_ia32_vfcmaddcph256_round_maskz( \ - (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_cmul_round_pch(A, B, R) \ - ((__m256h)__builtin_ia32_vfcmulcph256_round_mask( \ - (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), \ - (__v8sf)(__m256h)_mm256_undefined_ph(), (__mmask8)-1, (int)(R))) - -#define _mm256_mask_cmul_round_pch(W, U, A, B, R) \ - ((__m256h)__builtin_ia32_vfcmulcph256_round_mask( \ - (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_cmul_round_pch(U, A, B, R) \ - ((__m256h)__builtin_ia32_vfcmulcph256_round_mask( \ - (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), \ - (__v8sf)(__m256h)_mm256_setzero_ph(), (__mmask8)(U), (int)(R))) - -#define _mm256_fixupimm_round_pd(A, B, C, imm, R) \ - ((__m256d)__builtin_ia32_vfixupimmpd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4di)(__m256i)(C), \ - (int)(imm), (__mmask8)-1, (int)(R))) - -#define _mm256_mask_fixupimm_round_pd(A, U, B, C, imm, R) \ - ((__m256d)__builtin_ia32_vfixupimmpd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4di)(__m256i)(C), \ - (int)(imm), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \ - ((__m256d)__builtin_ia32_vfixupimmpd256_round_maskz( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4di)(__m256i)(C), \ - (int)(imm), (__mmask8)(U), (int)(R))) - -#define _mm256_fixupimm_round_ps(A, B, C, imm, R) \ - ((__m256)__builtin_ia32_vfixupimmps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8si)(__m256i)(C), \ - (int)(imm), (__mmask8)-1, (int)(R))) - -#define _mm256_mask_fixupimm_round_ps(A, U, B, C, imm, R) \ - ((__m256)__builtin_ia32_vfixupimmps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8si)(__m256i)(C), \ - (int)(imm), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \ - ((__m256)__builtin_ia32_vfixupimmps256_round_maskz( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8si)(__m256i)(C), \ - (int)(imm), (__mmask8)(U), (int)(R))) - -#define _mm256_fmadd_round_pd(A, B, C, R) \ - ((__m256d)__builtin_ia32_vfmaddpd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_mask_fmadd_round_pd(A, U, B, C, R) \ - ((__m256d)__builtin_ia32_vfmaddpd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_mask3_fmadd_round_pd(A, B, C, U, R) \ - ((__m256d)__builtin_ia32_vfmaddpd256_round_mask3( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_fmadd_round_pd(U, A, B, C, R) \ - ((__m256d)__builtin_ia32_vfmaddpd256_round_maskz( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_fmsub_round_pd(A, B, C, R) \ - ((__m256d)__builtin_ia32_vfmaddpd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), -(__v4df)(__m256d)(C), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_mask_fmsub_round_pd(A, U, B, C, R) \ - ((__m256d)__builtin_ia32_vfmaddpd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), -(__v4df)(__m256d)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_fmsub_round_pd(U, A, B, C, R) \ - ((__m256d)__builtin_ia32_vfmaddpd256_round_maskz( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), -(__v4df)(__m256d)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_fnmadd_round_pd(A, B, C, R) \ - ((__m256d)__builtin_ia32_vfmaddpd256_round_mask( \ - -(__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_mask3_fnmadd_round_pd(A, B, C, U, R) \ - ((__m256d)__builtin_ia32_vfmaddpd256_round_mask3( \ - -(__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_fnmadd_round_pd(U, A, B, C, R) \ - ((__m256d)__builtin_ia32_vfmaddpd256_round_maskz( \ - -(__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_fnmsub_round_pd(A, B, C, R) \ - ((__m256d)__builtin_ia32_vfmaddpd256_round_mask( \ - -(__v4df)(__m256d)(A), (__v4df)(__m256d)(B), -(__v4df)(__m256d)(C), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_maskz_fnmsub_round_pd(U, A, B, C, R) \ - ((__m256d)__builtin_ia32_vfmaddpd256_round_maskz( \ - -(__v4df)(__m256d)(A), (__v4df)(__m256d)(B), -(__v4df)(__m256d)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_fmadd_round_ph(A, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddph256_round_mask( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ - (__mmask16)-1, (int)(R))) - -#define _mm256_mask_fmadd_round_ph(A, U, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddph256_round_mask( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_mask3_fmadd_round_ph(A, B, C, U, R) \ - ((__m256h)__builtin_ia32_vfmaddph256_round_mask3( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_maskz_fmadd_round_ph(U, A, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddph256_round_maskz( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_fmsub_round_ph(A, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddph256_round_mask( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), -(__v16hf)(__m256h)(C), \ - (__mmask16)-1, (int)(R))) - -#define _mm256_mask_fmsub_round_ph(A, U, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddph256_round_mask( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), -(__v16hf)(__m256h)(C), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_maskz_fmsub_round_ph(U, A, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddph256_round_maskz( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), -(__v16hf)(__m256h)(C), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_fnmadd_round_ph(A, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddph256_round_mask( \ - (__v16hf)(__m256h)(A), -(__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ - (__mmask16)-1, (int)(R))) - -#define _mm256_mask3_fnmadd_round_ph(A, B, C, U, R) \ - ((__m256h)__builtin_ia32_vfmaddph256_round_mask3( \ - -(__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_maskz_fnmadd_round_ph(U, A, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddph256_round_maskz( \ - -(__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_fnmsub_round_ph(A, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddph256_round_mask( \ - (__v16hf)(__m256h)(A), -(__v16hf)(__m256h)(B), -(__v16hf)(__m256h)(C), \ - (__mmask16)-1, (int)(R))) - -#define _mm256_maskz_fnmsub_round_ph(U, A, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddph256_round_maskz( \ - -(__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), -(__v16hf)(__m256h)(C), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_fmadd_round_ps(A, B, C, R) \ - ((__m256)__builtin_ia32_vfmaddps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_mask_fmadd_round_ps(A, U, B, C, R) \ - ((__m256)__builtin_ia32_vfmaddps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_mask3_fmadd_round_ps(A, B, C, U, R) \ - ((__m256)__builtin_ia32_vfmaddps256_round_mask3( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_fmadd_round_ps(U, A, B, C, R) \ - ((__m256)__builtin_ia32_vfmaddps256_round_maskz( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_fmsub_round_ps(A, B, C, R) \ - ((__m256)__builtin_ia32_vfmaddps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), -(__v8sf)(__m256)(C), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_mask_fmsub_round_ps(A, U, B, C, R) \ - ((__m256)__builtin_ia32_vfmaddps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), -(__v8sf)(__m256)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_fmsub_round_ps(U, A, B, C, R) \ - ((__m256)__builtin_ia32_vfmaddps256_round_maskz( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), -(__v8sf)(__m256)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_fnmadd_round_ps(A, B, C, R) \ - ((__m256)__builtin_ia32_vfmaddps256_round_mask( \ - (__v8sf)(__m256)(A), -(__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_mask3_fnmadd_round_ps(A, B, C, U, R) \ - ((__m256)__builtin_ia32_vfmaddps256_round_mask3( \ - -(__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_fnmadd_round_ps(U, A, B, C, R) \ - ((__m256)__builtin_ia32_vfmaddps256_round_maskz( \ - -(__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_fnmsub_round_ps(A, B, C, R) \ - ((__m256)__builtin_ia32_vfmaddps256_round_mask( \ - (__v8sf)(__m256)(A), -(__v8sf)(__m256)(B), -(__v8sf)(__m256)(C), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_maskz_fnmsub_round_ps(U, A, B, C, R) \ - ((__m256)__builtin_ia32_vfmaddps256_round_maskz( \ - -(__v8sf)(__m256)(A), (__v8sf)(__m256)(B), -(__v8sf)(__m256)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_fmadd_round_pch(A, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddcph256_round_mask3( \ - (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(C), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_mask_fmadd_round_pch(A, U, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddcph256_round_mask( \ - (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_mask3_fmadd_round_pch(A, B, C, U, R) \ - ((__m256h)__builtin_ia32_vfmaddcph256_round_mask3( \ - (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_fmadd_round_pch(U, A, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddcph256_round_maskz( \ - (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_fmaddsub_round_pd(A, B, C, R) \ - ((__m256d)__builtin_ia32_vfmaddsubpd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_mask_fmaddsub_round_pd(A, U, B, C, R) \ - ((__m256d)__builtin_ia32_vfmaddsubpd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_mask3_fmaddsub_round_pd(A, B, C, U, R) \ - ((__m256d)__builtin_ia32_vfmaddsubpd256_round_mask3( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_fmaddsub_round_pd(U, A, B, C, R) \ - ((__m256d)__builtin_ia32_vfmaddsubpd256_round_maskz( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_fmsubadd_round_pd(A, B, C, R) \ - ((__m256d)__builtin_ia32_vfmaddsubpd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), -(__v4df)(__m256d)(C), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_mask_fmsubadd_round_pd(A, U, B, C, R) \ - ((__m256d)__builtin_ia32_vfmaddsubpd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), -(__v4df)(__m256d)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_fmsubadd_round_pd(U, A, B, C, R) \ - ((__m256d)__builtin_ia32_vfmaddsubpd256_round_maskz( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), -(__v4df)(__m256d)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_fmaddsub_round_ph(A, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddsubph256_round_mask( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ - (__mmask16)-1, (int)(R))) - -#define _mm256_mask_fmaddsub_round_ph(A, U, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddsubph256_round_mask( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_mask3_fmaddsub_round_ph(A, B, C, U, R) \ - ((__m256h)__builtin_ia32_vfmaddsubph256_round_mask3( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_maskz_fmaddsub_round_ph(U, A, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddsubph256_round_maskz( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_fmsubadd_round_ph(A, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddsubph256_round_mask( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), -(__v16hf)(__m256h)(C), \ - (__mmask16)-1, (int)(R))) - -#define _mm256_mask_fmsubadd_round_ph(A, U, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddsubph256_round_mask( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), -(__v16hf)(__m256h)(C), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_maskz_fmsubadd_round_ph(U, A, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddsubph256_round_maskz( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), -(__v16hf)(__m256h)(C), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_fmaddsub_round_ps(A, B, C, R) \ - ((__m256)__builtin_ia32_vfmaddsubps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_mask_fmaddsub_round_ps(A, U, B, C, R) \ - ((__m256)__builtin_ia32_vfmaddsubps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_mask3_fmaddsub_round_ps(A, B, C, U, R) \ - ((__m256)__builtin_ia32_vfmaddsubps256_round_mask3( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_fmaddsub_round_ps(U, A, B, C, R) \ - ((__m256)__builtin_ia32_vfmaddsubps256_round_maskz( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_fmsubadd_round_ps(A, B, C, R) \ - ((__m256)__builtin_ia32_vfmaddsubps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), -(__v8sf)(__m256)(C), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_mask_fmsubadd_round_ps(A, U, B, C, R) \ - ((__m256)__builtin_ia32_vfmaddsubps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), -(__v8sf)(__m256)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_fmsubadd_round_ps(U, A, B, C, R) \ - ((__m256)__builtin_ia32_vfmaddsubps256_round_maskz( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), -(__v8sf)(__m256)(C), \ - (__mmask8)(U), (int)(R))) -#define _mm256_mask3_fmsub_round_pd(A, B, C, U, R) \ - ((__m256d)__builtin_ia32_vfmsubpd256_round_mask3( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_mask3_fmsubadd_round_pd(A, B, C, U, R) \ - ((__m256d)__builtin_ia32_vfmsubaddpd256_round_mask3( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_mask_fnmadd_round_pd(A, U, B, C, R) \ - ((__m256d)__builtin_ia32_vfmaddpd256_round_mask( \ - (__v4df)(__m256d)(A), -(__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_mask_fnmsub_round_pd(A, U, B, C, R) \ - ((__m256d)__builtin_ia32_vfmaddpd256_round_mask( \ - (__v4df)(__m256d)(A), -(__v4df)(__m256d)(B), -(__v4df)(__m256d)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_mask3_fnmsub_round_pd(A, B, C, U, R) \ - ((__m256d)__builtin_ia32_vfmsubpd256_round_mask3( \ - -(__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_mask3_fmsub_round_ph(A, B, C, U, R) \ - ((__m256h)__builtin_ia32_vfmsubph256_round_mask3( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_mask3_fmsubadd_round_ph(A, B, C, U, R) \ - ((__m256h)__builtin_ia32_vfmsubaddph256_round_mask3( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_mask_fnmadd_round_ph(A, U, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddph256_round_mask( \ - (__v16hf)(__m256h)(A), -(__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_mask_fnmsub_round_ph(A, U, B, C, R) \ - ((__m256h)__builtin_ia32_vfmaddph256_round_mask( \ - (__v16hf)(__m256h)(A), -(__v16hf)(__m256h)(B), -(__v16hf)(__m256h)(C), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_mask3_fnmsub_round_ph(A, B, C, U, R) \ - ((__m256h)__builtin_ia32_vfmsubph256_round_mask3( \ - -(__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_mask3_fmsub_round_ps(A, B, C, U, R) \ - ((__m256)__builtin_ia32_vfmsubps256_round_mask3( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_mask3_fmsubadd_round_ps(A, B, C, U, R) \ - ((__m256)__builtin_ia32_vfmsubaddps256_round_mask3( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_mask_fnmadd_round_ps(A, U, B, C, R) \ - ((__m256)__builtin_ia32_vfmaddps256_round_mask( \ - (__v8sf)(__m256)(A), -(__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_mask_fnmsub_round_ps(A, U, B, C, R) \ - ((__m256)__builtin_ia32_vfmaddps256_round_mask( \ - (__v8sf)(__m256)(A), -(__v8sf)(__m256)(B), -(__v8sf)(__m256)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_mask3_fnmsub_round_ps(A, B, C, U, R) \ - ((__m256)__builtin_ia32_vfmsubps256_round_mask3( \ - -(__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_mul_round_pch(A, B, R) \ - ((__m256h)__builtin_ia32_vfmulcph256_round_mask( \ - (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), \ - (__v8sf)(__m256h)_mm256_undefined_ph(), (__mmask8)-1, (int)(R))) - -#define _mm256_mask_mul_round_pch(W, U, A, B, R) \ - ((__m256h)__builtin_ia32_vfmulcph256_round_mask( \ - (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_mul_round_pch(U, A, B, R) \ - ((__m256h)__builtin_ia32_vfmulcph256_round_mask( \ - (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), \ - (__v8sf)(__m256h)_mm256_setzero_ph(), (__mmask8)(U), (int)(R))) - -#define _mm256_getexp_round_pd(A, R) \ - ((__m256d)__builtin_ia32_vgetexppd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)_mm256_undefined_pd(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_getexp_round_pd(W, U, A, R) \ - ((__m256d)__builtin_ia32_vgetexppd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_getexp_round_pd(U, A, R) \ - ((__m256d)__builtin_ia32_vgetexppd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)_mm256_setzero_pd(), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_getexp_round_ph(A, R) \ - ((__m256h)__builtin_ia32_vgetexpph256_round_mask( \ - (__v16hf)(__m256h)(A), (__v16hf)_mm256_undefined_ph(), (__mmask16)-1, \ - (int)(R))) - -#define _mm256_mask_getexp_round_ph(W, U, A, R) \ - ((__m256h)__builtin_ia32_vgetexpph256_round_mask( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(W), (__mmask16)(U), (int)(R))) - -#define _mm256_maskz_getexp_round_ph(U, A, R) \ - ((__m256h)__builtin_ia32_vgetexpph256_round_mask( \ - (__v16hf)(__m256h)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), \ - (int)(R))) - -#define _mm256_getexp_round_ps(A, R) \ - ((__m256)__builtin_ia32_vgetexpps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)_mm256_undefined_ps(), (__mmask8)-1, \ - (int)(R))) - -#define _mm256_mask_getexp_round_ps(W, U, A, R) \ - ((__m256)__builtin_ia32_vgetexpps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_getexp_round_ps(U, A, R) \ - ((__m256)__builtin_ia32_vgetexpps256_round_mask((__v8sf)(__m256)(A), \ - (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_getmant_round_pd(A, B, C, R) \ - ((__m256d)__builtin_ia32_vgetmantpd256_round_mask( \ - (__v4df)(__m256d)(A), (int)(((C) << 2) | (B)), \ - (__v4df)_mm256_undefined_pd(), (__mmask8)-1, (int)(R))) - -#define _mm256_mask_getmant_round_pd(W, U, A, B, C, R) \ - ((__m256d)__builtin_ia32_vgetmantpd256_round_mask( \ - (__v4df)(__m256d)(A), (int)(((C) << 2) | (B)), (__v4df)(__m256d)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_getmant_round_pd(U, A, B, C, R) \ - ((__m256d)__builtin_ia32_vgetmantpd256_round_mask( \ - (__v4df)(__m256d)(A), (int)(((C) << 2) | (B)), \ - (__v4df)_mm256_setzero_pd(), (__mmask8)(U), (int)(R))) - -#define _mm256_getmant_round_ph(A, B, C, R) \ - ((__m256h)__builtin_ia32_vgetmantph256_round_mask( \ - (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), \ - (__v16hf)_mm256_undefined_ph(), (__mmask16)-1, (int)(R))) - -#define _mm256_mask_getmant_round_ph(W, U, A, B, C, R) \ - ((__m256h)__builtin_ia32_vgetmantph256_round_mask( \ - (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), (__v16hf)(__m256h)(W), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_maskz_getmant_round_ph(U, A, B, C, R) \ - ((__m256h)__builtin_ia32_vgetmantph256_round_mask( \ - (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), \ - (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R))) - -#define _mm256_getmant_round_ps(A, B, C, R) \ - ((__m256)__builtin_ia32_vgetmantps256_round_mask( \ - (__v8sf)(__m256)(A), (int)(((C) << 2) | (B)), \ - (__v8sf)_mm256_undefined_ps(), (__mmask8)-1, (int)(R))) - -#define _mm256_mask_getmant_round_ps(W, U, A, B, C, R) \ - ((__m256)__builtin_ia32_vgetmantps256_round_mask( \ - (__v8sf)(__m256)(A), (int)(((C) << 2) | (B)), (__v8sf)(__m256)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_getmant_round_ps(U, A, B, C, R) \ - ((__m256)__builtin_ia32_vgetmantps256_round_mask( \ - (__v8sf)(__m256)(A), (int)(((C) << 2) | (B)), \ - (__v8sf)_mm256_setzero_ps(), (__mmask8)(U), (int)(R))) - -#define _mm256_max_round_pd(A, B, R) \ - ((__m256d)__builtin_ia32_vmaxpd256_round((__v4df)(__m256d)(A), \ - (__v4df)(__m256d)(B), (int)(R))) - -#define _mm256_mask_max_round_pd(W, U, A, B, R) \ - ((__m256d)__builtin_ia32_selectpd_256( \ - (__mmask8)(U), (__v4df)_mm256_max_round_pd((A), (B), (R)), \ - (__v4df)(__m256d)(W))) - -#define _mm256_maskz_max_round_pd(U, A, B, R) \ - ((__m256d)__builtin_ia32_selectpd_256( \ - (__mmask8)(U), (__v4df)_mm256_max_round_pd((A), (B), (R)), \ - (__v4df)_mm256_setzero_pd())) - -#define _mm256_max_round_ph(A, B, R) \ - ((__m256h)__builtin_ia32_vmaxph256_round((__v16hf)(__m256h)(A), \ - (__v16hf)(__m256h)(B), (int)(R))) - -#define _mm256_mask_max_round_ph(W, U, A, B, R) \ - ((__m256h)__builtin_ia32_selectph_256( \ - (__mmask16)(U), (__v16hf)_mm256_max_round_ph((A), (B), (R)), \ - (__v16hf)(__m256h)(W))) - -#define _mm256_maskz_max_round_ph(U, A, B, R) \ - ((__m256h)__builtin_ia32_selectph_256( \ - (__mmask16)(U), (__v16hf)_mm256_max_round_ph((A), (B), (R)), \ - (__v16hf)_mm256_setzero_ph())) - -#define _mm256_max_round_ps(A, B, R) \ - ((__m256)__builtin_ia32_vmaxps256_round((__v8sf)(__m256)(A), \ - (__v8sf)(__m256)(B), (int)(R))) - -#define _mm256_mask_max_round_ps(W, U, A, B, R) \ - ((__m256)__builtin_ia32_selectps_256( \ - (__mmask8)(U), (__v8sf)_mm256_max_round_ps((A), (B), (R)), \ - (__v8sf)(__m256)(W))) - -#define _mm256_maskz_max_round_ps(U, A, B, R) \ - ((__m256)__builtin_ia32_selectps_256( \ - (__mmask8)(U), (__v8sf)_mm256_max_round_ps((A), (B), (R)), \ - (__v8sf)_mm256_setzero_ps())) - -#define _mm256_min_round_pd(A, B, R) \ - ((__m256d)__builtin_ia32_vminpd256_round((__v4df)(__m256d)(A), \ - (__v4df)(__m256d)(B), (int)(R))) - -#define _mm256_mask_min_round_pd(W, U, A, B, R) \ - ((__m256d)__builtin_ia32_selectpd_256( \ - (__mmask8)(U), (__v4df)_mm256_min_round_pd((A), (B), (R)), \ - (__v4df)(__m256d)(W))) - -#define _mm256_maskz_min_round_pd(U, A, B, R) \ - ((__m256d)__builtin_ia32_selectpd_256( \ - (__mmask8)(U), (__v4df)_mm256_min_round_pd((A), (B), (R)), \ - (__v4df)_mm256_setzero_pd())) - -#define _mm256_min_round_ph(A, B, R) \ - ((__m256h)__builtin_ia32_vminph256_round((__v16hf)(__m256h)(A), \ - (__v16hf)(__m256h)(B), (int)(R))) - -#define _mm256_mask_min_round_ph(W, U, A, B, R) \ - ((__m256h)__builtin_ia32_selectph_256( \ - (__mmask16)(U), (__v16hf)_mm256_min_round_ph((A), (B), (R)), \ - (__v16hf)(__m256h)(W))) - -#define _mm256_maskz_min_round_ph(U, A, B, R) \ - ((__m256h)__builtin_ia32_selectph_256( \ - (__mmask16)(U), (__v16hf)_mm256_min_round_ph((A), (B), (R)), \ - (__v16hf)_mm256_setzero_ph())) - -#define _mm256_min_round_ps(A, B, R) \ - ((__m256)__builtin_ia32_vminps256_round((__v8sf)(__m256)(A), \ - (__v8sf)(__m256)(B), (int)(R))) - -#define _mm256_mask_min_round_ps(W, U, A, B, R) \ - ((__m256)__builtin_ia32_selectps_256( \ - (__mmask8)(U), (__v8sf)_mm256_min_round_ps((A), (B), (R)), \ - (__v8sf)(__m256)(W))) - -#define _mm256_maskz_min_round_ps(U, A, B, R) \ - ((__m256)__builtin_ia32_selectps_256( \ - (__mmask8)(U), (__v8sf)_mm256_min_round_ps((A), (B), (R)), \ - (__v8sf)_mm256_setzero_ps())) - -#define _mm256_mul_round_pd(A, B, R) \ - ((__m256d)__builtin_ia32_vmulpd256_round((__v4df)(__m256d)(A), \ - (__v4df)(__m256d)(B), (int)(R))) - -#define _mm256_mask_mul_round_pd(W, U, A, B, R) \ - ((__m256d)__builtin_ia32_selectpd_256( \ - (__mmask8)(U), (__v4df)_mm256_mul_round_pd((A), (B), (R)), \ - (__v4df)(__m256d)(W))) - -#define _mm256_maskz_mul_round_pd(U, A, B, R) \ - ((__m256d)__builtin_ia32_selectpd_256( \ - (__mmask8)(U), (__v4df)_mm256_mul_round_pd((A), (B), (R)), \ - (__v4df)_mm256_setzero_pd())) - -#define _mm256_mul_round_ph(A, B, R) \ - ((__m256h)__builtin_ia32_vmulph256_round((__v16hf)(__m256h)(A), \ - (__v16hf)(__m256h)(B), (int)(R))) - -#define _mm256_mask_mul_round_ph(W, U, A, B, R) \ - ((__m256h)__builtin_ia32_selectph_256( \ - (__mmask16)(U), (__v16hf)_mm256_mul_round_ph((A), (B), (R)), \ - (__v16hf)(__m256h)(W))) - -#define _mm256_maskz_mul_round_ph(U, A, B, R) \ - ((__m256h)__builtin_ia32_selectph_256( \ - (__mmask16)(U), (__v16hf)_mm256_mul_round_ph((A), (B), (R)), \ - (__v16hf)_mm256_setzero_ph())) - -#define _mm256_mul_round_ps(A, B, R) \ - ((__m256)__builtin_ia32_vmulps256_round((__v8sf)(__m256)(A), \ - (__v8sf)(__m256)(B), (int)(R))) - -#define _mm256_mask_mul_round_ps(W, U, A, B, R) \ - ((__m256)__builtin_ia32_selectps_256( \ - (__mmask8)(U), (__v8sf)_mm256_mul_round_ps((A), (B), (R)), \ - (__v8sf)(__m256)(W))) - -#define _mm256_maskz_mul_round_ps(U, A, B, R) \ - ((__m256)__builtin_ia32_selectps_256( \ - (__mmask8)(U), (__v8sf)_mm256_mul_round_ps((A), (B), (R)), \ - (__v8sf)_mm256_setzero_ps())) - -#define _mm256_range_round_pd(A, B, C, R) \ - ((__m256d)__builtin_ia32_vrangepd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \ - (__v4df)_mm256_setzero_pd(), (__mmask8)-1, (int)(R))) - -#define _mm256_mask_range_round_pd(W, U, A, B, C, R) \ - ((__m256d)__builtin_ia32_vrangepd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \ - (__v4df)(__m256d)(W), (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_range_round_pd(U, A, B, C, R) \ - ((__m256d)__builtin_ia32_vrangepd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \ - (__v4df)_mm256_setzero_pd(), (__mmask8)(U), (int)(R))) - -#define _mm256_range_round_ps(A, B, C, R) \ - ((__m256)__builtin_ia32_vrangeps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), \ - (__v8sf)_mm256_setzero_ps(), (__mmask8)-1, (int)(R))) - -#define _mm256_mask_range_round_ps(W, U, A, B, C, R) \ - ((__m256)__builtin_ia32_vrangeps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), (__v8sf)(__m256)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_range_round_ps(U, A, B, C, R) \ - ((__m256)__builtin_ia32_vrangeps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), \ - (__v8sf)_mm256_setzero_ps(), (__mmask8)(U), (int)(R))) - -#define _mm256_reduce_round_pd(A, B, R) \ - ((__m256d)__builtin_ia32_vreducepd256_round_mask( \ - (__v4df)(__m256d)(A), (int)(B), (__v4df)_mm256_setzero_pd(), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_mask_reduce_round_pd(W, U, A, B, R) \ - ((__m256d)__builtin_ia32_vreducepd256_round_mask( \ - (__v4df)(__m256d)(A), (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_maskz_reduce_round_pd(U, A, B, R) \ - ((__m256d)__builtin_ia32_vreducepd256_round_mask( \ - (__v4df)(__m256d)(A), (int)(B), (__v4df)_mm256_setzero_pd(), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_mask_reduce_round_ph(W, U, A, imm, R) \ - ((__m256h)__builtin_ia32_vreduceph256_round_mask( \ - (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)(__m256h)(W), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_maskz_reduce_round_ph(U, A, imm, R) \ - ((__m256h)__builtin_ia32_vreduceph256_round_mask( \ - (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)_mm256_setzero_ph(), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_reduce_round_ph(A, imm, R) \ - ((__m256h)__builtin_ia32_vreduceph256_round_mask( \ - (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)_mm256_undefined_ph(), \ - (__mmask16)-1, (int)(R))) - -#define _mm256_reduce_round_ps(A, B, R) \ - ((__m256)__builtin_ia32_vreduceps256_round_mask( \ - (__v8sf)(__m256)(A), (int)(B), (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_mask_reduce_round_ps(W, U, A, B, R) \ - ((__m256)__builtin_ia32_vreduceps256_round_mask( \ - (__v8sf)(__m256)(A), (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U), \ - (int)(R))) - -#define _mm256_maskz_reduce_round_ps(U, A, B, R) \ - ((__m256)__builtin_ia32_vreduceps256_round_mask( \ - (__v8sf)(__m256)(A), (int)(B), (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_roundscale_round_pd(A, imm, R) \ - ((__m256d)__builtin_ia32_vrndscalepd256_round_mask( \ - (__v4df)(__m256d)(A), (int)(imm), (__v4df)_mm256_undefined_pd(), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_mask_roundscale_round_pd(A, B, C, imm, R) \ - ((__m256d)__builtin_ia32_vrndscalepd256_round_mask( \ - (__v4df)(__m256d)(C), (int)(imm), (__v4df)(__m256d)(A), (__mmask8)(B), \ - (int)(R))) - -#define _mm256_maskz_roundscale_round_pd(A, B, imm, R) \ - ((__m256d)__builtin_ia32_vrndscalepd256_round_mask( \ - (__v4df)(__m256d)(B), (int)(imm), (__v4df)_mm256_setzero_pd(), \ - (__mmask8)(A), (int)(R))) - -#define _mm256_roundscale_round_ph(A, imm, R) \ - ((__m256h)__builtin_ia32_vrndscaleph256_round_mask( \ - (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)_mm256_undefined_ph(), \ - (__mmask16)-1, (int)(R))) - -#define _mm256_mask_roundscale_round_ph(A, B, C, imm, R) \ - ((__m256h)__builtin_ia32_vrndscaleph256_round_mask( \ - (__v16hf)(__m256h)(C), (int)(imm), (__v16hf)(__m256h)(A), \ - (__mmask16)(B), (int)(R))) - -#define _mm256_maskz_roundscale_round_ph(A, B, imm, R) \ - ((__m256h)__builtin_ia32_vrndscaleph256_round_mask( \ - (__v16hf)(__m256h)(B), (int)(imm), (__v16hf)_mm256_setzero_ph(), \ - (__mmask16)(A), (int)(R))) - -#define _mm256_roundscale_round_ps(A, imm, R) \ - ((__m256)__builtin_ia32_vrndscaleps256_round_mask( \ - (__v8sf)(__m256)(A), (int)(imm), (__v8sf)_mm256_undefined_ps(), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_mask_roundscale_round_ps(A, B, C, imm, R) \ - ((__m256)__builtin_ia32_vrndscaleps256_round_mask( \ - (__v8sf)(__m256)(C), (int)(imm), (__v8sf)(__m256)(A), (__mmask8)(B), \ - (int)(R))) - -#define _mm256_maskz_roundscale_round_ps(A, B, imm, R) \ - ((__m256)__builtin_ia32_vrndscaleps256_round_mask( \ - (__v8sf)(__m256)(B), (int)(imm), (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)(A), (int)(R))) - -#define _mm256_scalef_round_pd(A, B, R) \ - ((__m256d)__builtin_ia32_vscalefpd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), \ - (__v4df)_mm256_undefined_pd(), (__mmask8)-1, (int)(R))) - -#define _mm256_mask_scalef_round_pd(W, U, A, B, R) \ - ((__m256d)__builtin_ia32_vscalefpd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_scalef_round_pd(U, A, B, R) \ - ((__m256d)__builtin_ia32_vscalefpd256_round_mask( \ - (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)_mm256_setzero_pd(), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_scalef_round_ph(A, B, R) \ - ((__m256h)__builtin_ia32_vscalefph256_round_mask( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), \ - (__v16hf)_mm256_undefined_ph(), (__mmask16)-1, (int)(R))) - -#define _mm256_mask_scalef_round_ph(W, U, A, B, R) \ - ((__m256h)__builtin_ia32_vscalefph256_round_mask( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(W), \ - (__mmask16)(U), (int)(R))) - -#define _mm256_maskz_scalef_round_ph(U, A, B, R) \ - ((__m256h)__builtin_ia32_vscalefph256_round_mask( \ - (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), \ - (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R))) - -#define _mm256_scalef_round_ps(A, B, R) \ - ((__m256)__builtin_ia32_vscalefps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)_mm256_undefined_ps(), \ - (__mmask8)-1, (int)(R))) - -#define _mm256_mask_scalef_round_ps(W, U, A, B, R) \ - ((__m256)__builtin_ia32_vscalefps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(W), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_maskz_scalef_round_ps(U, A, B, R) \ - ((__m256)__builtin_ia32_vscalefps256_round_mask( \ - (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)(U), (int)(R))) - -#define _mm256_sqrt_round_pd(A, R) \ - ((__m256d)__builtin_ia32_vsqrtpd256_round((__v4df)(__m256d)(A), (int)(R))) - -#define _mm256_mask_sqrt_round_pd(W, U, A, R) \ - ((__m256d)__builtin_ia32_selectpd_256( \ - (__mmask8)(U), (__v4df)_mm256_sqrt_round_pd((A), (R)), \ - (__v4df)(__m256d)(W))) - -#define _mm256_maskz_sqrt_round_pd(U, A, R) \ - ((__m256d)__builtin_ia32_selectpd_256( \ - (__mmask8)(U), (__v4df)_mm256_sqrt_round_pd((A), (R)), \ - (__v4df)_mm256_setzero_pd())) - -#define _mm256_sqrt_round_ph(A, R) \ - ((__m256h)__builtin_ia32_vsqrtph256_round((__v16hf)(__m256h)(A), (int)(R))) - -#define _mm256_mask_sqrt_round_ph(W, U, A, R) \ - ((__m256h)__builtin_ia32_selectph_256( \ - (__mmask16)(U), (__v16hf)_mm256_sqrt_round_ph((A), (R)), \ - (__v16hf)(__m256h)(W))) - -#define _mm256_maskz_sqrt_round_ph(U, A, R) \ - ((__m256h)__builtin_ia32_selectph_256( \ - (__mmask16)(U), (__v16hf)_mm256_sqrt_round_ph((A), (R)), \ - (__v16hf)_mm256_setzero_ph())) - -#define _mm256_sqrt_round_ps(A, R) \ - ((__m256)__builtin_ia32_vsqrtps256_round((__v8sf)(__m256)(A), (int)(R))) - -#define _mm256_mask_sqrt_round_ps(W, U, A, R) \ - ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ - (__v8sf)_mm256_sqrt_round_ps((A), (R)), \ - (__v8sf)(__m256)(W))) - -#define _mm256_maskz_sqrt_round_ps(U, A, R) \ - ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ - (__v8sf)_mm256_sqrt_round_ps((A), (R)), \ - (__v8sf)_mm256_setzero_ps())) - -#define _mm256_sub_round_pd(A, B, R) \ - ((__m256d)__builtin_ia32_vsubpd256_round((__v4df)(__m256d)(A), \ - (__v4df)(__m256d)(B), (int)(R))) - -#define _mm256_mask_sub_round_pd(W, U, A, B, R) \ - ((__m256d)__builtin_ia32_selectpd_256( \ - (__mmask8)(U), (__v4df)_mm256_sub_round_pd((A), (B), (R)), \ - (__v4df)(__m256d)(W))) - -#define _mm256_maskz_sub_round_pd(U, A, B, R) \ - ((__m256d)__builtin_ia32_selectpd_256( \ - (__mmask8)(U), (__v4df)_mm256_sub_round_pd((A), (B), (R)), \ - (__v4df)_mm256_setzero_pd())) - -#define _mm256_sub_round_ph(A, B, R) \ - ((__m256h)__builtin_ia32_vsubph256_round((__v16hf)(__m256h)(A), \ - (__v16hf)(__m256h)(B), (int)(R))) - -#define _mm256_mask_sub_round_ph(W, U, A, B, R) \ - ((__m256h)__builtin_ia32_selectph_256( \ - (__mmask16)(U), (__v16hf)_mm256_sub_round_ph((A), (B), (R)), \ - (__v16hf)(__m256h)(W))) - -#define _mm256_maskz_sub_round_ph(U, A, B, R) \ - ((__m256h)__builtin_ia32_selectph_256( \ - (__mmask16)(U), (__v16hf)_mm256_sub_round_ph((A), (B), (R)), \ - (__v16hf)_mm256_setzero_ph())) - -#define _mm256_sub_round_ps(A, B, R) \ - ((__m256)__builtin_ia32_vsubps256_round((__v8sf)(__m256)(A), \ - (__v8sf)(__m256)(B), (int)(R))) - -#define _mm256_mask_sub_round_ps(W, U, A, B, R) \ - ((__m256)__builtin_ia32_selectps_256( \ - (__mmask8)(U), (__v8sf)_mm256_sub_round_ps((A), (B), (R)), \ - (__v8sf)(__m256)(W))) - -#define _mm256_maskz_sub_round_ps(U, A, B, R) \ - ((__m256)__builtin_ia32_selectps_256( \ - (__mmask8)(U), (__v8sf)_mm256_sub_round_ps((A), (B), (R)), \ - (__v8sf)_mm256_setzero_ps())) - #undef __DEFAULT_FN_ATTRS256 #undef __DEFAULT_FN_ATTRS128 diff --git a/clang/lib/Headers/avx10_2satcvtdsintrin.h b/clang/lib/Headers/avx10_2satcvtdsintrin.h index 59028436311e7..9124287487212 100644 --- a/clang/lib/Headers/avx10_2satcvtdsintrin.h +++ b/clang/lib/Headers/avx10_2satcvtdsintrin.h @@ -71,175 +71,138 @@ #endif /* __x86_64__ */ // 128 Bit : Double -> int -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttspd_epi32(__m128d __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_cvtts_pd_epi32(__m128d __A) { return ((__m128i)__builtin_ia32_vcvttpd2dqs128_mask( (__v2df)__A, (__v4si)(__m128i)_mm_undefined_si128(), (__mmask8)(-1))); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvttspd_epi32(__m128i __W, __mmask8 __U, __m128d __A) { +_mm_mask_cvtts_pd_epi32(__m128i __W, __mmask8 __U, __m128d __A) { return ((__m128i)__builtin_ia32_vcvttpd2dqs128_mask((__v2df)__A, (__v4si)__W, __U)); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvttspd_epi32(__mmask16 __U, __m128d __A) { +_mm_maskz_cvtts_pd_epi32(__mmask16 __U, __m128d __A) { return ((__m128i)__builtin_ia32_vcvttpd2dqs128_mask( (__v2df)__A, (__v4si)(__m128i)_mm_setzero_si128(), __U)); } // 256 Bit : Double -> int static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_cvttspd_epi32(__m256d __A) { +_mm256_cvtts_pd_epi32(__m256d __A) { return ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( (__v4df)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvttspd_epi32(__m128i __W, __mmask8 __U, __m256d __A) { +_mm256_mask_cvtts_pd_epi32(__m128i __W, __mmask8 __U, __m256d __A) { return ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( (__v4df)__A, (__v4si)__W, __U, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvttspd_epi32(__mmask8 __U, __m256d __A) { +_mm256_maskz_cvtts_pd_epi32(__mmask8 __U, __m256d __A) { return ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( (__v4df)__A, (__v4si)_mm_setzero_si128(), __U, _MM_FROUND_CUR_DIRECTION)); } -#define _mm256_cvtts_roundpd_epi32(__A, __R) \ - ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( \ - (__v4df)(__m256d)__A, (__v4si)(__m128i)_mm_undefined_si128(), \ - (__mmask8) - 1, (int)(__R))) - -#define _mm256_mask_cvtts_roundpd_epi32(__W, __U, __A, __R) \ - ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( \ - (__v4df)(__m256d)__A, (__v4si)(__m128i)__W, (__mmask8)__U, (int)(__R))) - -#define _mm256_maskz_cvtts_roundpd_epi32(__U, __A, __R) \ - ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( \ - (__v4df)(__m256d)__A, (__v4si)(__m128i)_mm_setzero_si128(), \ - (__mmask8)__U, (int)(__R))) - // 128 Bit : Double -> uint -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttspd_epu32(__m128d __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_cvtts_pd_epu32(__m128d __A) { return ((__m128i)__builtin_ia32_vcvttpd2udqs128_mask( (__v2df)__A, (__v4si)(__m128i)_mm_undefined_si128(), (__mmask8)(-1))); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvttspd_epu32(__m128i __W, __mmask8 __U, __m128d __A) { +_mm_mask_cvtts_pd_epu32(__m128i __W, __mmask8 __U, __m128d __A) { return ((__m128i)__builtin_ia32_vcvttpd2udqs128_mask( (__v2df)__A, (__v4si)(__m128i)__W, (__mmask8)__U)); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvttspd_epu32(__mmask8 __U, __m128d __A) { +_mm_maskz_cvtts_pd_epu32(__mmask8 __U, __m128d __A) { return ((__m128i)__builtin_ia32_vcvttpd2udqs128_mask( (__v2df)__A, (__v4si)(__m128i)_mm_setzero_si128(), __U)); } // 256 Bit : Double -> uint static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_cvttspd_epu32(__m256d __A) { +_mm256_cvtts_pd_epu32(__m256d __A) { return ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( (__v4df)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvttspd_epu32(__m128i __W, __mmask8 __U, __m256d __A) { +_mm256_mask_cvtts_pd_epu32(__m128i __W, __mmask8 __U, __m256d __A) { return ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( (__v4df)__A, (__v4si)__W, __U, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvttspd_epu32(__mmask8 __U, __m256d __A) { +_mm256_maskz_cvtts_pd_epu32(__mmask8 __U, __m256d __A) { return ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( (__v4df)__A, (__v4si)_mm_setzero_si128(), __U, _MM_FROUND_CUR_DIRECTION)); } -#define _mm256_cvtts_roundpd_epu32(__A, __R) \ - ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( \ - (__v4df)(__m256d)__A, (__v4si)(__m128i)_mm_undefined_si128(), \ - (__mmask8) - 1, (int)(__R))) - -#define _mm256_mask_cvtts_roundpd_epu32(__W, __U, __A, __R) \ - ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( \ - (__v4df)(__m256d)__A, (__v4si)(__m128i)__W, (__mmask8)__U, (int)(__R))) - -#define _mm256_maskz_cvtts_roundpd_epu32(__U, __A, __R) \ - ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( \ - (__v4df)(__m256d)__A, (__v4si)(__m128i)_mm_setzero_si128(), \ - (__mmask8)__U, (int)(__R))) - // 128 Bit : Double -> long -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttspd_epi64(__m128d __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_cvtts_pd_epi64(__m128d __A) { return ((__m128i)__builtin_ia32_vcvttpd2qqs128_mask( (__v2df)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1)); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvttspd_epi64(__m128i __W, __mmask8 __U, __m128d __A) { +_mm_mask_cvtts_pd_epi64(__m128i __W, __mmask8 __U, __m128d __A) { return ((__m128i)__builtin_ia32_vcvttpd2qqs128_mask((__v2df)__A, (__v2di)__W, (__mmask8)__U)); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvttspd_epi64(__mmask8 __U, __m128d __A) { +_mm_maskz_cvtts_pd_epi64(__mmask8 __U, __m128d __A) { return ((__m128i)__builtin_ia32_vcvttpd2qqs128_mask( (__v2df)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U)); } // 256 Bit : Double -> long static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvttspd_epi64(__m256d __A) { +_mm256_cvtts_pd_epi64(__m256d __A) { return ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask( (__v4df)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvttspd_epi64(__m256i __W, __mmask8 __U, __m256d __A) { +_mm256_mask_cvtts_pd_epi64(__m256i __W, __mmask8 __U, __m256d __A) { return ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask( (__v4df)__A, (__v4di)__W, __U, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvttspd_epi64(__mmask8 __U, __m256d __A) { +_mm256_maskz_cvtts_pd_epi64(__mmask8 __U, __m256d __A) { return ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask( (__v4df)__A, (__v4di)_mm256_setzero_si256(), __U, _MM_FROUND_CUR_DIRECTION)); } -#define _mm256_cvtts_roundpd_epi64(__A, __R) \ - ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask( \ - (__v4df)__A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, \ - (int)__R)) - -#define _mm256_mask_cvtts_roundpd_epi64(__W, __U, __A, __R) \ - ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask((__v4df)__A, (__v4di)__W, \ - (__mmask8)__U, (int)__R)) - -#define _mm256_maskz_cvtts_roundpd_epi64(__U, __A, __R) \ - ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask( \ - (__v4df)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U, (int)__R)) - // 128 Bit : Double -> ulong -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttspd_epu64(__m128d __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_cvtts_pd_epu64(__m128d __A) { return ((__m128i)__builtin_ia32_vcvttpd2uqqs128_mask( (__v2df)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1)); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvttspd_epu64(__m128i __W, __mmask8 __U, __m128d __A) { +_mm_mask_cvtts_pd_epu64(__m128i __W, __mmask8 __U, __m128d __A) { return ((__m128i)__builtin_ia32_vcvttpd2uqqs128_mask((__v2df)__A, (__v2di)__W, (__mmask8)__U)); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvttspd_epu64(__mmask8 __U, __m128d __A) { +_mm_maskz_cvtts_pd_epu64(__mmask8 __U, __m128d __A) { return ((__m128i)__builtin_ia32_vcvttpd2uqqs128_mask( (__v2df)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U)); } @@ -247,105 +210,78 @@ _mm_maskz_cvttspd_epu64(__mmask8 __U, __m128d __A) { // 256 Bit : Double -> ulong static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvttspd_epu64(__m256d __A) { +_mm256_cvtts_pd_epu64(__m256d __A) { return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( (__v4df)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvttspd_epu64(__m256i __W, __mmask8 __U, __m256d __A) { +_mm256_mask_cvtts_pd_epu64(__m256i __W, __mmask8 __U, __m256d __A) { return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( (__v4df)__A, (__v4di)__W, __U, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvttspd_epu64(__mmask8 __U, __m256d __A) { +_mm256_maskz_cvtts_pd_epu64(__mmask8 __U, __m256d __A) { return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( (__v4df)__A, (__v4di)_mm256_setzero_si256(), __U, _MM_FROUND_CUR_DIRECTION)); } -#define _mm256_cvtts_roundpd_epu64(__A, __R) \ - ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( \ - (__v4df)__A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, \ - (int)__R)) - -#define _mm256_mask_cvtts_roundpd_epu64(__W, __U, __A, __R) \ - ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( \ - (__v4df)__A, (__v4di)__W, (__mmask8)__U, (int)__R)) - -#define _mm256_maskz_cvtts_roundpd_epu64(__U, __A, __R) \ - ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( \ - (__v4df)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U, (int)__R)) - // 128 Bit : float -> int -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttsps_epi32(__m128 __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtts_ps_epi32(__m128 __A) { return ((__m128i)__builtin_ia32_vcvttps2dqs128_mask( (__v4sf)__A, (__v4si)(__m128i)_mm_undefined_si128(), (__mmask8)(-1))); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvttsps_epi32(__m128i __W, __mmask8 __U, __m128 __A) { +_mm_mask_cvtts_ps_epi32(__m128i __W, __mmask8 __U, __m128 __A) { return ((__m128i)__builtin_ia32_vcvttps2dqs128_mask((__v4sf)__A, (__v4si)__W, (__mmask8)__U)); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvttsps_epi32(__mmask8 __U, __m128 __A) { +_mm_maskz_cvtts_ps_epi32(__mmask8 __U, __m128 __A) { return ((__m128i)__builtin_ia32_vcvttps2dqs128_mask( (__v4sf)__A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)__U)); } // 256 Bit : float -> int static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvttsps_epi32(__m256 __A) { +_mm256_cvtts_ps_epi32(__m256 __A) { return ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( (__v8sf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvttsps_epi32(__m256i __W, __mmask8 __U, __m256 __A) { +_mm256_mask_cvtts_ps_epi32(__m256i __W, __mmask8 __U, __m256 __A) { return ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( (__v8sf)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvttsps_epi32(__mmask8 __U, __m256 __A) { +_mm256_maskz_cvtts_ps_epi32(__mmask8 __U, __m256 __A) { return ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( (__v8sf)__A, (__v8si)_mm256_setzero_si256(), __U, _MM_FROUND_CUR_DIRECTION)); } -#define _mm256_cvtts_roundps_epi32(__A, __R) \ - ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( \ - (__v8sf)(__m256)__A, (__v8si)(__m256i)_mm256_undefined_si256(), \ - (__mmask8) - 1, (int)(__R))) - -#define _mm256_mask_cvtts_roundps_epi32(__W, __U, __A, __R) \ - ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( \ - (__v8sf)(__m256)__A, (__v8si)(__m256i)__W, (__mmask8)__U, (int)(__R))) - -#define _mm256_maskz_cvtts_roundps_epi32(__U, __A, __R) \ - ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( \ - (__v8sf)(__m256)__A, (__v8si)(__m256i)_mm256_setzero_si256(), \ - (__mmask8)__U, (int)(__R))) - // 128 Bit : float -> uint -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttsps_epu32(__m128 __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtts_ps_epu32(__m128 __A) { return ((__m128i)__builtin_ia32_vcvttps2udqs128_mask( (__v4sf)__A, (__v4si)(__m128i)_mm_undefined_si128(), (__mmask8)(-1))); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvttsps_epu32(__m128i __W, __mmask8 __U, __m128 __A) { +_mm_mask_cvtts_ps_epu32(__m128i __W, __mmask8 __U, __m128 __A) { return ((__m128i)__builtin_ia32_vcvttps2udqs128_mask((__v4sf)__A, (__v4si)__W, (__mmask8)__U)); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvttsps_epu32(__mmask8 __U, __m128 __A) { +_mm_maskz_cvtts_ps_epu32(__mmask8 __U, __m128 __A) { return ((__m128i)__builtin_ia32_vcvttps2udqs128_mask( (__v4sf)__A, (__v4si)_mm_setzero_si128(), (__mmask8)__U)); } @@ -353,144 +289,102 @@ _mm_maskz_cvttsps_epu32(__mmask8 __U, __m128 __A) { // 256 Bit : float -> uint static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvttsps_epu32(__m256 __A) { +_mm256_cvtts_ps_epu32(__m256 __A) { return ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( (__v8sf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvttsps_epu32(__m256i __W, __mmask8 __U, __m256 __A) { +_mm256_mask_cvtts_ps_epu32(__m256i __W, __mmask8 __U, __m256 __A) { return ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( (__v8sf)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvttsps_epu32(__mmask8 __U, __m256 __A) { +_mm256_maskz_cvtts_ps_epu32(__mmask8 __U, __m256 __A) { return ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( (__v8sf)__A, (__v8si)_mm256_setzero_si256(), __U, _MM_FROUND_CUR_DIRECTION)); } -#define _mm256_cvtts_roundps_epu32(__A, __R) \ - ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( \ - (__v8sf)(__m256)__A, (__v8si)(__m256i)_mm256_undefined_si256(), \ - (__mmask8) - 1, (int)(__R))) - -#define _mm256_mask_cvtts_roundps_epu32(__W, __U, __A, __R) \ - ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( \ - (__v8sf)(__m256)__A, (__v8si)(__m256i)__W, (__mmask8)__U, (int)(__R))) - -#define _mm256_maskz_cvtts_roundps_epu32(__U, __A, __R) \ - ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( \ - (__v8sf)(__m256)__A, (__v8si)(__m256i)_mm256_setzero_si256(), \ - (__mmask8)__U, (int)(__R))) - // 128 bit : float -> long -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttsps_epi64(__m128 __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtts_ps_epi64(__m128 __A) { return ((__m128i)__builtin_ia32_vcvttps2qqs128_mask( (__v4sf)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1)); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvttsps_epi64(__m128i __W, __mmask8 __U, __m128 __A) { +_mm_mask_cvtts_ps_epi64(__m128i __W, __mmask8 __U, __m128 __A) { return ((__m128i)__builtin_ia32_vcvttps2qqs128_mask( (__v4sf)__A, (__v2di)(__m128i)__W, (__mmask8)__U)); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvttsps_epi64(__mmask8 __U, __m128 __A) { +_mm_maskz_cvtts_ps_epi64(__mmask8 __U, __m128 __A) { return ((__m128i)__builtin_ia32_vcvttps2qqs128_mask( (__v4sf)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U)); } // 256 bit : float -> long static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvttsps_epi64(__m128 __A) { +_mm256_cvtts_ps_epi64(__m128 __A) { return ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( (__v4sf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvttsps_epi64(__m256i __W, __mmask8 __U, __m128 __A) { +_mm256_mask_cvtts_ps_epi64(__m256i __W, __mmask8 __U, __m128 __A) { return ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( (__v4sf)__A, (__v4di)__W, __U, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvttsps_epi64(__mmask8 __U, __m128 __A) { +_mm256_maskz_cvtts_ps_epi64(__mmask8 __U, __m128 __A) { return ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( (__v4sf)__A, (__v4di)_mm256_setzero_si256(), __U, _MM_FROUND_CUR_DIRECTION)); } -#define _mm256_cvtts_roundps_epi64(__A, __R) \ - ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( \ - (__v4sf)(__m128)__A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, \ - (int)__R)) - -#define _mm256_mask_cvtts_roundps_epi64(__W, __U, __A, __R) \ - ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( \ - (__v4sf)(__m128)__A, (__v4di)__W, (__mmask8)__U, (int)__R)) - -#define _mm256_maskz_cvtts_roundps_epi64(__U, __A, __R) \ - ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( \ - (__v4sf)(__m128)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U, \ - (int)__R)) - // 128 bit : float -> ulong -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttsps_epu64(__m128 __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtts_ps_epu64(__m128 __A) { return ((__m128i)__builtin_ia32_vcvttps2uqqs128_mask( (__v4sf)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1)); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvttsps_epu64(__m128i __W, __mmask8 __U, __m128 __A) { +_mm_mask_cvtts_ps_epu64(__m128i __W, __mmask8 __U, __m128 __A) { return ((__m128i)__builtin_ia32_vcvttps2uqqs128_mask( (__v4sf)__A, (__v2di)(__m128i)__W, (__mmask8)__U)); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvttsps_epu64(__mmask8 __U, __m128 __A) { +_mm_maskz_cvtts_ps_epu64(__mmask8 __U, __m128 __A) { return ((__m128i)__builtin_ia32_vcvttps2uqqs128_mask( (__v4sf)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U)); } // 256 bit : float -> ulong static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvttsps_epu64(__m128 __A) { +_mm256_cvtts_ps_epu64(__m128 __A) { return ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask( (__v4sf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvttsps_epu64(__m256i __W, __mmask8 __U, __m128 __A) { +_mm256_mask_cvtts_ps_epu64(__m256i __W, __mmask8 __U, __m128 __A) { return ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask( (__v4sf)__A, (__v4di)__W, __U, _MM_FROUND_CUR_DIRECTION)); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvttsps_epu64(__mmask8 __U, __m128 __A) { +_mm256_maskz_cvtts_ps_epu64(__mmask8 __U, __m128 __A) { return ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask( (__v4sf)__A, (__v4di)_mm256_setzero_si256(), __U, _MM_FROUND_CUR_DIRECTION)); } -#define _mm256_cvtts_roundps_epu64(__A, __R) \ - ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask( \ - (__v4sf)(__m128)__A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, \ - (int)__R)) - -#define _mm256_mask_cvtts_roundps_epu64(__W, __U, __A, __R) \ - ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask( \ - (__v4sf)(__m128)__A, (__v4di)__W, (__mmask8)__U, (int)__R)) - -#define _mm256_maskz_cvtts_roundps_epu64(__U, __A, __R) \ - ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask( \ - (__v4sf)(__m128)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U, \ - (int)__R)) - #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 #endif // __AVX10_2SATCVTDSINTRIN_H diff --git a/clang/lib/Headers/avx10_2satcvtintrin.h b/clang/lib/Headers/avx10_2satcvtintrin.h index d16c60e6382df..cfa5b02fc7d4c 100644 --- a/clang/lib/Headers/avx10_2satcvtintrin.h +++ b/clang/lib/Headers/avx10_2satcvtintrin.h @@ -14,431 +14,319 @@ #ifndef __AVX10_2SATCVTINTRIN_H #define __AVX10_2SATCVTINTRIN_H -#define _mm_ipcvtbf16_epi8(A) \ +#define _mm_ipcvts_bf16_epi8(A) \ ((__m128i)__builtin_ia32_vcvtbf162ibs128((__v8bf)(__m128bh)(A))) -#define _mm_mask_ipcvtbf16_epi8(W, U, A) \ +#define _mm_mask_ipcvts_bf16_epi8(W, U, A) \ ((__m128i)__builtin_ia32_selectw_128( \ - (__mmask8)(U), (__v8hi)_mm_ipcvtbf16_epi8(A), (__v8hi)(__m128i)(W))) + (__mmask8)(U), (__v8hi)_mm_ipcvts_bf16_epi8(A), (__v8hi)(__m128i)(W))) -#define _mm_maskz_ipcvtbf16_epi8(U, A) \ +#define _mm_maskz_ipcvts_bf16_epi8(U, A) \ ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ - (__v8hi)_mm_ipcvtbf16_epi8(A), \ + (__v8hi)_mm_ipcvts_bf16_epi8(A), \ (__v8hi)_mm_setzero_si128())) -#define _mm256_ipcvtbf16_epi8(A) \ +#define _mm256_ipcvts_bf16_epi8(A) \ ((__m256i)__builtin_ia32_vcvtbf162ibs256((__v16bf)(__m256bh)(A))) -#define _mm256_mask_ipcvtbf16_epi8(W, U, A) \ +#define _mm256_mask_ipcvts_bf16_epi8(W, U, A) \ ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ - (__v16hi)_mm256_ipcvtbf16_epi8(A), \ + (__v16hi)_mm256_ipcvts_bf16_epi8(A), \ (__v16hi)(__m256i)(W))) -#define _mm256_maskz_ipcvtbf16_epi8(U, A) \ +#define _mm256_maskz_ipcvts_bf16_epi8(U, A) \ ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ - (__v16hi)_mm256_ipcvtbf16_epi8(A), \ + (__v16hi)_mm256_ipcvts_bf16_epi8(A), \ (__v16hi)_mm256_setzero_si256())) -#define _mm_ipcvtbf16_epu8(A) \ +#define _mm_ipcvts_bf16_epu8(A) \ ((__m128i)__builtin_ia32_vcvtbf162iubs128((__v8bf)(__m128bh)(A))) -#define _mm_mask_ipcvtbf16_epu8(W, U, A) \ +#define _mm_mask_ipcvts_bf16_epu8(W, U, A) \ ((__m128i)__builtin_ia32_selectw_128( \ - (__mmask8)(U), (__v8hi)_mm_ipcvtbf16_epu8(A), (__v8hi)(__m128i)(W))) + (__mmask8)(U), (__v8hi)_mm_ipcvts_bf16_epu8(A), (__v8hi)(__m128i)(W))) -#define _mm_maskz_ipcvtbf16_epu8(U, A) \ +#define _mm_maskz_ipcvts_bf16_epu8(U, A) \ ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ - (__v8hi)_mm_ipcvtbf16_epu8(A), \ + (__v8hi)_mm_ipcvts_bf16_epu8(A), \ (__v8hi)_mm_setzero_si128())) -#define _mm256_ipcvtbf16_epu8(A) \ +#define _mm256_ipcvts_bf16_epu8(A) \ ((__m256i)__builtin_ia32_vcvtbf162iubs256((__v16bf)(__m256bh)(A))) -#define _mm256_mask_ipcvtbf16_epu8(W, U, A) \ +#define _mm256_mask_ipcvts_bf16_epu8(W, U, A) \ ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ - (__v16hi)_mm256_ipcvtbf16_epu8(A), \ + (__v16hi)_mm256_ipcvts_bf16_epu8(A), \ (__v16hi)(__m256i)(W))) -#define _mm256_maskz_ipcvtbf16_epu8(U, A) \ +#define _mm256_maskz_ipcvts_bf16_epu8(U, A) \ ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ - (__v16hi)_mm256_ipcvtbf16_epu8(A), \ + (__v16hi)_mm256_ipcvts_bf16_epu8(A), \ (__v16hi)_mm256_setzero_si256())) -#define _mm_ipcvtph_epi8(A) \ +#define _mm_ipcvts_ph_epi8(A) \ ((__m128i)__builtin_ia32_vcvtph2ibs128_mask( \ (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1)) -#define _mm_mask_ipcvtph_epi8(W, U, A) \ +#define _mm_mask_ipcvts_ph_epi8(W, U, A) \ ((__m128i)__builtin_ia32_vcvtph2ibs128_mask((__v8hf)(__m128h)(A), \ (__v8hu)(W), (__mmask8)(U))) -#define _mm_maskz_ipcvtph_epi8(U, A) \ +#define _mm_maskz_ipcvts_ph_epi8(U, A) \ ((__m128i)__builtin_ia32_vcvtph2ibs128_mask( \ (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U))) -#define _mm256_ipcvtph_epi8(A) \ +#define _mm256_ipcvts_ph_epi8(A) \ ((__m256i)__builtin_ia32_vcvtph2ibs256_mask( \ (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \ _MM_FROUND_CUR_DIRECTION)) -#define _mm256_mask_ipcvtph_epi8(W, U, A) \ +#define _mm256_mask_ipcvts_ph_epi8(W, U, A) \ ((__m256i)__builtin_ia32_vcvtph2ibs256_mask((__v16hf)(__m256h)(A), \ (__v16hu)(W), (__mmask16)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm256_maskz_ipcvtph_epi8(U, A) \ +#define _mm256_maskz_ipcvts_ph_epi8(U, A) \ ((__m256i)__builtin_ia32_vcvtph2ibs256_mask( \ (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \ (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) -#define _mm256_ipcvt_roundph_epi8(A, R) \ - ((__m256i)__builtin_ia32_vcvtph2ibs256_mask((__v16hf)(__m256h)(A), \ - (__v16hu)_mm256_setzero_si256(), \ - (__mmask16)-1, (const int)R)) - -#define _mm256_mask_ipcvt_roundph_epi8(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvtph2ibs256_mask( \ - (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R)) - -#define _mm256_maskz_ipcvt_roundph_epi8(U, A, R) \ - ((__m256i)__builtin_ia32_vcvtph2ibs256_mask((__v16hf)(__m256h)(A), \ - (__v16hu)_mm256_setzero_si256(), \ - (__mmask16)(U), (const int)R)) - -#define _mm_ipcvtph_epu8(A) \ +#define _mm_ipcvts_ph_epu8(A) \ ((__m128i)__builtin_ia32_vcvtph2iubs128_mask( \ (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1)) -#define _mm_mask_ipcvtph_epu8(W, U, A) \ +#define _mm_mask_ipcvts_ph_epu8(W, U, A) \ ((__m128i)__builtin_ia32_vcvtph2iubs128_mask((__v8hf)(__m128h)(A), \ (__v8hu)(W), (__mmask8)(U))) -#define _mm_maskz_ipcvtph_epu8(U, A) \ +#define _mm_maskz_ipcvts_ph_epu8(U, A) \ ((__m128i)__builtin_ia32_vcvtph2iubs128_mask( \ (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U))) -#define _mm256_ipcvtph_epu8(A) \ +#define _mm256_ipcvts_ph_epu8(A) \ ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \ (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \ _MM_FROUND_CUR_DIRECTION)) -#define _mm256_mask_ipcvtph_epu8(W, U, A) \ +#define _mm256_mask_ipcvts_ph_epu8(W, U, A) \ ((__m256i)__builtin_ia32_vcvtph2iubs256_mask((__v16hf)(__m256h)(A), \ (__v16hu)(W), (__mmask16)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm256_maskz_ipcvtph_epu8(U, A) \ +#define _mm256_maskz_ipcvts_ph_epu8(U, A) \ ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \ (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \ (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) -#define _mm256_ipcvt_roundph_epu8(A, R) \ - ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \ - (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \ - (const int)R)) - -#define _mm256_mask_ipcvt_roundph_epu8(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \ - (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R)) - -#define _mm256_maskz_ipcvt_roundph_epu8(U, A, R) \ - ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \ - (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)(U), \ - (const int)R)) - -#define _mm_ipcvtps_epi8(A) \ +#define _mm_ipcvts_ps_epi8(A) \ ((__m128i)__builtin_ia32_vcvtps2ibs128_mask( \ (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1)) -#define _mm_mask_ipcvtps_epi8(W, U, A) \ +#define _mm_mask_ipcvts_ps_epi8(W, U, A) \ ((__m128i)__builtin_ia32_vcvtps2ibs128_mask((__v4sf)(__m128)(A), \ (__v4su)(W), (__mmask8)(U))) -#define _mm_maskz_ipcvtps_epi8(U, A) \ +#define _mm_maskz_ipcvts_ps_epi8(U, A) \ ((__m128i)__builtin_ia32_vcvtps2ibs128_mask( \ (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U))) -#define _mm256_ipcvtps_epi8(A) \ +#define _mm256_ipcvts_ps_epi8(A) \ ((__m256i)__builtin_ia32_vcvtps2ibs256_mask( \ (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \ _MM_FROUND_CUR_DIRECTION)) -#define _mm256_mask_ipcvtps_epi8(W, U, A) \ +#define _mm256_mask_ipcvts_ps_epi8(W, U, A) \ ((__m256i)__builtin_ia32_vcvtps2ibs256_mask((__v8sf)(__m256)(A), \ (__v8su)(W), (__mmask8)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm256_maskz_ipcvtps_epi8(U, A) \ +#define _mm256_maskz_ipcvts_ps_epi8(U, A) \ ((__m256i)__builtin_ia32_vcvtps2ibs256_mask( \ (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm256_ipcvt_roundps_epi8(A, R) \ - ((__m256i)__builtin_ia32_vcvtps2ibs256_mask((__v8sf)(__m256)(A), \ - (__v8su)_mm256_setzero_si256(), \ - (__mmask8)-1, (const int)R)) - -#define _mm256_mask_ipcvt_roundps_epi8(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvtps2ibs256_mask( \ - (__v8sf)(__m256)(A), (__v8su)(W), (__mmask8)(U), (const int)R)) - -#define _mm256_maskz_ipcvt_roundps_epi8(U, A, R) \ - ((__m256i)__builtin_ia32_vcvtps2ibs256_mask((__v8sf)(__m256)(A), \ - (__v8su)_mm256_setzero_si256(), \ - (__mmask8)(U), (const int)R)) - -#define _mm_ipcvtps_epu8(A) \ +#define _mm_ipcvts_ps_epu8(A) \ ((__m128i)__builtin_ia32_vcvtps2iubs128_mask( \ (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1)) -#define _mm_mask_ipcvtps_epu8(W, U, A) \ +#define _mm_mask_ipcvts_ps_epu8(W, U, A) \ ((__m128i)__builtin_ia32_vcvtps2iubs128_mask((__v4sf)(__m128)(A), \ (__v4su)(W), (__mmask8)(U))) -#define _mm_maskz_ipcvtps_epu8(U, A) \ +#define _mm_maskz_ipcvts_ps_epu8(U, A) \ ((__m128i)__builtin_ia32_vcvtps2iubs128_mask( \ (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U))) -#define _mm256_ipcvtps_epu8(A) \ +#define _mm256_ipcvts_ps_epu8(A) \ ((__m256i)__builtin_ia32_vcvtps2iubs256_mask( \ (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \ _MM_FROUND_CUR_DIRECTION)) -#define _mm256_mask_ipcvtps_epu8(W, U, A) \ +#define _mm256_mask_ipcvts_ps_epu8(W, U, A) \ ((__m256i)__builtin_ia32_vcvtps2iubs256_mask((__v8sf)(__m256)(A), \ (__v8su)(W), (__mmask8)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm256_maskz_ipcvtps_epu8(U, A) \ +#define _mm256_maskz_ipcvts_ps_epu8(U, A) \ ((__m256i)__builtin_ia32_vcvtps2iubs256_mask( \ (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm256_ipcvt_roundps_epu8(A, R) \ - ((__m256i)__builtin_ia32_vcvtps2iubs256_mask((__v8sf)(__m256)(A), \ - (__v8su)_mm256_setzero_si256(), \ - (__mmask8)-1, (const int)R)) - -#define _mm256_mask_ipcvt_roundps_epu8(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvtps2iubs256_mask( \ - (__v8sf)(__m256)(A), (__v8su)(W), (__mmask8)(U), (const int)R)) - -#define _mm256_maskz_ipcvt_roundps_epu8(U, A, R) \ - ((__m256i)__builtin_ia32_vcvtps2iubs256_mask((__v8sf)(__m256)(A), \ - (__v8su)_mm256_setzero_si256(), \ - (__mmask8)(U), (const int)R)) - -#define _mm_ipcvttbf16_epi8(A) \ +#define _mm_ipcvtts_bf16_epi8(A) \ ((__m128i)__builtin_ia32_vcvttbf162ibs128((__v8bf)(__m128bh)(A))) -#define _mm_mask_ipcvttbf16_epi8(W, U, A) \ +#define _mm_mask_ipcvtts_bf16_epi8(W, U, A) \ ((__m128i)__builtin_ia32_selectw_128( \ - (__mmask8)(U), (__v8hi)_mm_ipcvttbf16_epi8(A), (__v8hi)(__m128i)(W))) + (__mmask8)(U), (__v8hi)_mm_ipcvtts_bf16_epi8(A), (__v8hi)(__m128i)(W))) -#define _mm_maskz_ipcvttbf16_epi8(U, A) \ +#define _mm_maskz_ipcvtts_bf16_epi8(U, A) \ ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ - (__v8hi)_mm_ipcvttbf16_epi8(A), \ + (__v8hi)_mm_ipcvtts_bf16_epi8(A), \ (__v8hi)_mm_setzero_si128())) -#define _mm256_ipcvttbf16_epi8(A) \ +#define _mm256_ipcvtts_bf16_epi8(A) \ ((__m256i)__builtin_ia32_vcvttbf162ibs256((__v16bf)(__m256bh)(A))) -#define _mm256_mask_ipcvttbf16_epi8(W, U, A) \ +#define _mm256_mask_ipcvtts_bf16_epi8(W, U, A) \ ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ - (__v16hi)_mm256_ipcvttbf16_epi8(A), \ + (__v16hi)_mm256_ipcvtts_bf16_epi8(A), \ (__v16hi)(__m256i)(W))) -#define _mm256_maskz_ipcvttbf16_epi8(U, A) \ +#define _mm256_maskz_ipcvtts_bf16_epi8(U, A) \ ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ - (__v16hi)_mm256_ipcvttbf16_epi8(A), \ + (__v16hi)_mm256_ipcvtts_bf16_epi8(A), \ (__v16hi)_mm256_setzero_si256())) -#define _mm_ipcvttbf16_epu8(A) \ +#define _mm_ipcvtts_bf16_epu8(A) \ ((__m128i)__builtin_ia32_vcvttbf162iubs128((__v8bf)(__m128bh)(A))) -#define _mm_mask_ipcvttbf16_epu8(W, U, A) \ +#define _mm_mask_ipcvtts_bf16_epu8(W, U, A) \ ((__m128i)__builtin_ia32_selectw_128( \ - (__mmask8)(U), (__v8hi)_mm_ipcvttbf16_epu8(A), (__v8hi)(__m128i)(W))) + (__mmask8)(U), (__v8hi)_mm_ipcvtts_bf16_epu8(A), (__v8hi)(__m128i)(W))) -#define _mm_maskz_ipcvttbf16_epu8(U, A) \ +#define _mm_maskz_ipcvtts_bf16_epu8(U, A) \ ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ - (__v8hi)_mm_ipcvttbf16_epu8(A), \ + (__v8hi)_mm_ipcvtts_bf16_epu8(A), \ (__v8hi)_mm_setzero_si128())) -#define _mm256_ipcvttbf16_epu8(A) \ +#define _mm256_ipcvtts_bf16_epu8(A) \ ((__m256i)__builtin_ia32_vcvttbf162iubs256((__v16bf)(__m256bh)(A))) -#define _mm256_mask_ipcvttbf16_epu8(W, U, A) \ +#define _mm256_mask_ipcvtts_bf16_epu8(W, U, A) \ ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ - (__v16hi)_mm256_ipcvttbf16_epu8(A), \ + (__v16hi)_mm256_ipcvtts_bf16_epu8(A), \ (__v16hi)(__m256i)(W))) -#define _mm256_maskz_ipcvttbf16_epu8(U, A) \ +#define _mm256_maskz_ipcvtts_bf16_epu8(U, A) \ ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ - (__v16hi)_mm256_ipcvttbf16_epu8(A), \ + (__v16hi)_mm256_ipcvtts_bf16_epu8(A), \ (__v16hi)_mm256_setzero_si256())) -#define _mm_ipcvttph_epi8(A) \ +#define _mm_ipcvtts_ph_epi8(A) \ ((__m128i)__builtin_ia32_vcvttph2ibs128_mask( \ (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1)) -#define _mm_mask_ipcvttph_epi8(W, U, A) \ +#define _mm_mask_ipcvtts_ph_epi8(W, U, A) \ ((__m128i)__builtin_ia32_vcvttph2ibs128_mask((__v8hf)(__m128h)(A), \ (__v8hu)(W), (__mmask8)(U))) -#define _mm_maskz_ipcvttph_epi8(U, A) \ +#define _mm_maskz_ipcvtts_ph_epi8(U, A) \ ((__m128i)__builtin_ia32_vcvttph2ibs128_mask( \ (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U))) -#define _mm256_ipcvttph_epi8(A) \ +#define _mm256_ipcvtts_ph_epi8(A) \ ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \ (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \ _MM_FROUND_CUR_DIRECTION)) -#define _mm256_mask_ipcvttph_epi8(W, U, A) \ +#define _mm256_mask_ipcvtts_ph_epi8(W, U, A) \ ((__m256i)__builtin_ia32_vcvttph2ibs256_mask((__v16hf)(__m256h)(A), \ (__v16hu)(W), (__mmask16)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm256_maskz_ipcvttph_epi8(U, A) \ +#define _mm256_maskz_ipcvtts_ph_epi8(U, A) \ ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \ (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \ (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) -#define _mm256_ipcvtt_roundph_epi8(A, R) \ - ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \ - (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \ - (const int)R)) - -#define _mm256_mask_ipcvtt_roundph_epi8(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \ - (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R)) - -#define _mm256_maskz_ipcvtt_roundph_epi8(U, A, R) \ - ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \ - (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)(U), \ - (const int)R)) - -#define _mm_ipcvttph_epu8(A) \ +#define _mm_ipcvtts_ph_epu8(A) \ ((__m128i)__builtin_ia32_vcvttph2iubs128_mask( \ (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1)) -#define _mm_mask_ipcvttph_epu8(W, U, A) \ +#define _mm_mask_ipcvtts_ph_epu8(W, U, A) \ ((__m128i)__builtin_ia32_vcvttph2iubs128_mask((__v8hf)(__m128h)(A), \ (__v8hu)(W), (__mmask8)(U))) -#define _mm_maskz_ipcvttph_epu8(U, A) \ +#define _mm_maskz_ipcvtts_ph_epu8(U, A) \ ((__m128i)__builtin_ia32_vcvttph2iubs128_mask( \ (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U))) -#define _mm256_ipcvttph_epu8(A) \ +#define _mm256_ipcvtts_ph_epu8(A) \ ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \ (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \ _MM_FROUND_CUR_DIRECTION)) -#define _mm256_mask_ipcvttph_epu8(W, U, A) \ +#define _mm256_mask_ipcvtts_ph_epu8(W, U, A) \ ((__m256i)__builtin_ia32_vcvttph2iubs256_mask((__v16hf)(__m256h)(A), \ (__v16hu)(W), (__mmask16)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm256_maskz_ipcvttph_epu8(U, A) \ +#define _mm256_maskz_ipcvtts_ph_epu8(U, A) \ ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \ (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \ (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) -#define _mm256_ipcvtt_roundph_epu8(A, R) \ - ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \ - (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \ - (const int)R)) - -#define _mm256_mask_ipcvtt_roundph_epu8(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \ - (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R)) - -#define _mm256_maskz_ipcvtt_roundph_epu8(U, A, R) \ - ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \ - (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)(U), \ - (const int)R)) - -#define _mm_ipcvttps_epi8(A) \ +#define _mm_ipcvtts_ps_epi8(A) \ ((__m128i)__builtin_ia32_vcvttps2ibs128_mask( \ (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1)) -#define _mm_mask_ipcvttps_epi8(W, U, A) \ +#define _mm_mask_ipcvtts_ps_epi8(W, U, A) \ ((__m128i)__builtin_ia32_vcvttps2ibs128_mask((__v4sf)(__m128)(A), \ (__v4su)(W), (__mmask8)(U))) -#define _mm_maskz_ipcvttps_epi8(U, A) \ +#define _mm_maskz_ipcvtts_ps_epi8(U, A) \ ((__m128i)__builtin_ia32_vcvttps2ibs128_mask( \ (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U))) -#define _mm256_ipcvttps_epi8(A) \ +#define _mm256_ipcvtts_ps_epi8(A) \ ((__m256i)__builtin_ia32_vcvttps2ibs256_mask( \ (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \ _MM_FROUND_CUR_DIRECTION)) -#define _mm256_mask_ipcvttps_epi8(W, U, A) \ +#define _mm256_mask_ipcvtts_ps_epi8(W, U, A) \ ((__m256i)__builtin_ia32_vcvttps2ibs256_mask((__v8sf)(__m256)(A), \ (__v8su)(W), (__mmask8)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm256_maskz_ipcvttps_epi8(U, A) \ +#define _mm256_maskz_ipcvtts_ps_epi8(U, A) \ ((__m256i)__builtin_ia32_vcvttps2ibs256_mask( \ (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm256_ipcvtt_roundps_epi8(A, R) \ - ((__m256i)__builtin_ia32_vcvttps2ibs256_mask((__v8sf)(__m256)(A), \ - (__v8su)_mm256_setzero_si256(), \ - (__mmask8)-1, (const int)R)) - -#define _mm256_mask_ipcvtt_roundps_epi8(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvttps2ibs256_mask( \ - (__v8sf)(__m256)(A), (__v8su)(W), (__mmask8)(U), (const int)R)) - -#define _mm256_maskz_ipcvtt_roundps_epi8(U, A, R) \ - ((__m256i)__builtin_ia32_vcvttps2ibs256_mask((__v8sf)(__m256)(A), \ - (__v8su)_mm256_setzero_si256(), \ - (__mmask8)(U), (const int)R)) - -#define _mm_ipcvttps_epu8(A) \ +#define _mm_ipcvtts_ps_epu8(A) \ ((__m128i)__builtin_ia32_vcvttps2iubs128_mask( \ (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1)) -#define _mm_mask_ipcvttps_epu8(W, U, A) \ +#define _mm_mask_ipcvtts_ps_epu8(W, U, A) \ ((__m128i)__builtin_ia32_vcvttps2iubs128_mask((__v4sf)(__m128)(A), \ (__v4su)(W), (__mmask8)(U))) -#define _mm_maskz_ipcvttps_epu8(U, A) \ +#define _mm_maskz_ipcvtts_ps_epu8(U, A) \ ((__m128i)__builtin_ia32_vcvttps2iubs128_mask( \ (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U))) -#define _mm256_ipcvttps_epu8(A) \ +#define _mm256_ipcvtts_ps_epu8(A) \ ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \ (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \ _MM_FROUND_CUR_DIRECTION)) -#define _mm256_mask_ipcvttps_epu8(W, U, A) \ +#define _mm256_mask_ipcvtts_ps_epu8(W, U, A) \ ((__m256i)__builtin_ia32_vcvttps2iubs256_mask((__v8sf)(__m256)(A), \ (__v8su)(W), (__mmask8)(U), \ _MM_FROUND_CUR_DIRECTION)) -#define _mm256_maskz_ipcvttps_epu8(U, A) \ +#define _mm256_maskz_ipcvtts_ps_epu8(U, A) \ ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \ (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U), \ _MM_FROUND_CUR_DIRECTION)) - -#define _mm256_ipcvtt_roundps_epu8(A, R) \ - ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \ - (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \ - (const int)R)) - -#define _mm256_mask_ipcvtt_roundps_epu8(W, U, A, R) \ - ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \ - (__v8sf)(__m256)(A), (__v8su)(W), (__mmask8)(U), (const int)R)) - -#define _mm256_maskz_ipcvtt_roundps_epu8(U, A, R) \ - ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \ - (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)(U), \ - (const int)R)) #endif // __AVX10_2SATCVTINTRIN_H diff --git a/clang/lib/Interpreter/CMakeLists.txt b/clang/lib/Interpreter/CMakeLists.txt index bf70cdfbee01e..38cf139fa86a6 100644 --- a/clang/lib/Interpreter/CMakeLists.txt +++ b/clang/lib/Interpreter/CMakeLists.txt @@ -27,6 +27,7 @@ add_clang_library(clangInterpreter Interpreter.cpp InterpreterValuePrinter.cpp InterpreterUtils.cpp + RemoteJITUtils.cpp Value.cpp ${WASM_SRC} PARTIAL_SOURCES_INTENDED diff --git a/clang/lib/Interpreter/DeviceOffload.cpp b/clang/lib/Interpreter/DeviceOffload.cpp index 1999d63d1aa04..05625ddedb72f 100644 --- a/clang/lib/Interpreter/DeviceOffload.cpp +++ b/clang/lib/Interpreter/DeviceOffload.cpp @@ -25,16 +25,14 @@ namespace clang { IncrementalCUDADeviceParser::IncrementalCUDADeviceParser( - std::unique_ptr DeviceInstance, - CompilerInstance &HostInstance, + CompilerInstance &DeviceInstance, CompilerInstance &HostInstance, llvm::IntrusiveRefCntPtr FS, llvm::Error &Err, const std::list &PTUs) - : IncrementalParser(*DeviceInstance, Err), PTUs(PTUs), VFS(FS), + : IncrementalParser(DeviceInstance, Err), PTUs(PTUs), VFS(FS), CodeGenOpts(HostInstance.getCodeGenOpts()), - TargetOpts(HostInstance.getTargetOpts()) { + TargetOpts(DeviceInstance.getTargetOpts()) { if (Err) return; - DeviceCI = std::move(DeviceInstance); StringRef Arch = TargetOpts.CPU; if (!Arch.starts_with("sm_") || Arch.substr(3).getAsInteger(10, SMVersion)) { Err = llvm::joinErrors(std::move(Err), llvm::make_error( @@ -44,34 +42,6 @@ IncrementalCUDADeviceParser::IncrementalCUDADeviceParser( } } -llvm::Expected -IncrementalCUDADeviceParser::Parse(llvm::StringRef Input) { - auto PTU = IncrementalParser::Parse(Input); - if (!PTU) - return PTU.takeError(); - - auto PTX = GeneratePTX(); - if (!PTX) - return PTX.takeError(); - - auto Err = GenerateFatbinary(); - if (Err) - return std::move(Err); - - std::string FatbinFileName = - "/incr_module_" + std::to_string(PTUs.size()) + ".fatbin"; - VFS->addFile(FatbinFileName, 0, - llvm::MemoryBuffer::getMemBuffer( - llvm::StringRef(FatbinContent.data(), FatbinContent.size()), - "", false)); - - CodeGenOpts.CudaGpuBinaryFileName = FatbinFileName; - - FatbinContent.clear(); - - return PTU; -} - llvm::Expected IncrementalCUDADeviceParser::GeneratePTX() { auto &PTU = PTUs.back(); std::string Error; @@ -172,6 +142,19 @@ llvm::Error IncrementalCUDADeviceParser::GenerateFatbinary() { FatbinContent.append(PTXCode.begin(), PTXCode.end()); + const PartialTranslationUnit &PTU = PTUs.back(); + + std::string FatbinFileName = "/" + PTU.TheModule->getName().str() + ".fatbin"; + + VFS->addFile(FatbinFileName, 0, + llvm::MemoryBuffer::getMemBuffer( + llvm::StringRef(FatbinContent.data(), FatbinContent.size()), + "", false)); + + CodeGenOpts.CudaGpuBinaryFileName = FatbinFileName; + + FatbinContent.clear(); + return llvm::Error::success(); } diff --git a/clang/lib/Interpreter/DeviceOffload.h b/clang/lib/Interpreter/DeviceOffload.h index b9a1acab004c3..0b903e31c6799 100644 --- a/clang/lib/Interpreter/DeviceOffload.h +++ b/clang/lib/Interpreter/DeviceOffload.h @@ -28,13 +28,10 @@ class IncrementalCUDADeviceParser : public IncrementalParser { public: IncrementalCUDADeviceParser( - std::unique_ptr DeviceInstance, - CompilerInstance &HostInstance, + CompilerInstance &DeviceInstance, CompilerInstance &HostInstance, llvm::IntrusiveRefCntPtr VFS, llvm::Error &Err, const std::list &PTUs); - llvm::Expected Parse(llvm::StringRef Input) override; - // Generate PTX for the last PTU. llvm::Expected GeneratePTX(); @@ -44,7 +41,6 @@ class IncrementalCUDADeviceParser : public IncrementalParser { ~IncrementalCUDADeviceParser(); protected: - std::unique_ptr DeviceCI; int SMVersion; llvm::SmallString<1024> PTXCode; llvm::SmallVector FatbinContent; diff --git a/clang/lib/Interpreter/IncrementalExecutor.h b/clang/lib/Interpreter/IncrementalExecutor.h index dbd61f0b8b1eb..71d71bc3883e2 100644 --- a/clang/lib/Interpreter/IncrementalExecutor.h +++ b/clang/lib/Interpreter/IncrementalExecutor.h @@ -57,7 +57,7 @@ class IncrementalExecutor { virtual llvm::Error removeModule(PartialTranslationUnit &PTU); virtual llvm::Error runCtors() const; virtual llvm::Error cleanUp(); - llvm::Expected + virtual llvm::Expected getSymbolAddress(llvm::StringRef Name, SymbolNameKind NameKind) const; llvm::orc::LLJIT &GetExecutionEngine() { return *Jit; } diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp index fa4c1439c9261..eaa5c31cb69e0 100644 --- a/clang/lib/Interpreter/Interpreter.cpp +++ b/clang/lib/Interpreter/Interpreter.cpp @@ -18,6 +18,7 @@ #include "llvm/Support/VirtualFileSystem.h" #ifdef __EMSCRIPTEN__ #include "Wasm.h" +#include #endif // __EMSCRIPTEN__ #include "clang/AST/ASTConsumer.h" @@ -45,6 +46,7 @@ #include "clang/Sema/Lookup.h" #include "clang/Serialization/ObjectFilePCHContainerReader.h" #include "llvm/ExecutionEngine/JITSymbol.h" +#include "llvm/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.h" #include "llvm/ExecutionEngine/Orc/LLJIT.h" #include "llvm/IR/Module.h" #include "llvm/Support/Errc.h" @@ -415,6 +417,10 @@ Interpreter::Interpreter(std::unique_ptr Instance, Interpreter::~Interpreter() { IncrParser.reset(); Act->FinalizeAction(); + if (DeviceParser) + DeviceParser.reset(); + if (DeviceAct) + DeviceAct->FinalizeAction(); if (IncrExecutor) { if (llvm::Error Err = IncrExecutor->cleanUp()) llvm::report_fatal_error( @@ -450,10 +456,11 @@ const char *const Runtimes = R"( )"; llvm::Expected> -Interpreter::create(std::unique_ptr CI) { +Interpreter::create(std::unique_ptr CI, + std::unique_ptr JB) { llvm::Error Err = llvm::Error::success(); - auto Interp = - std::unique_ptr(new Interpreter(std::move(CI), Err)); + auto Interp = std::unique_ptr( + new Interpreter(std::move(CI), Err, JB ? std::move(JB) : nullptr)); if (Err) return std::move(Err); @@ -462,6 +469,8 @@ Interpreter::create(std::unique_ptr CI) { auto PTU = Interp->Parse(Runtimes); if (!PTU) return PTU.takeError(); + + if (llvm::Error Err = Interp->Execute(*PTU)) return Err; Interp->markUserCodeStart(); Interp->ValuePrintingInfo.resize(4); @@ -480,20 +489,37 @@ Interpreter::createWithCUDA(std::unique_ptr CI, OverlayVFS->pushOverlay(IMVFS); CI->createFileManager(OverlayVFS); - auto Interp = Interpreter::create(std::move(CI)); - if (auto E = Interp.takeError()) - return std::move(E); + llvm::Expected> InterpOrErr = + Interpreter::create(std::move(CI)); + if (!InterpOrErr) + return InterpOrErr; + + std::unique_ptr Interp = std::move(*InterpOrErr); llvm::Error Err = llvm::Error::success(); - auto DeviceParser = std::make_unique( - std::move(DCI), *(*Interp)->getCompilerInstance(), IMVFS, Err, - (*Interp)->PTUs); + llvm::LLVMContext &LLVMCtx = *Interp->TSCtx->getContext(); + + auto DeviceAct = + std::make_unique(*DCI, LLVMCtx, Err, *Interp); + if (Err) return std::move(Err); - (*Interp)->DeviceParser = std::move(DeviceParser); + Interp->DeviceAct = std::move(DeviceAct); + + DCI->ExecuteAction(*Interp->DeviceAct); + + Interp->DeviceCI = std::move(DCI); - return Interp; + auto DeviceParser = std::make_unique( + *Interp->DeviceCI, *Interp->getCompilerInstance(), IMVFS, Err, + Interp->PTUs); + + if (Err) + return std::move(Err); + + Interp->DeviceParser = std::move(DeviceParser); + return std::move(Interp); } const CompilerInstance *Interpreter::getCompilerInstance() const { @@ -531,15 +557,17 @@ size_t Interpreter::getEffectivePTUSize() const { PartialTranslationUnit & Interpreter::RegisterPTU(TranslationUnitDecl *TU, - std::unique_ptr M /*={}*/) { + std::unique_ptr M /*={}*/, + IncrementalAction *Action) { PTUs.emplace_back(PartialTranslationUnit()); PartialTranslationUnit &LastPTU = PTUs.back(); LastPTU.TUPart = TU; if (!M) - M = GenModule(); + M = GenModule(Action); - assert((!getCodeGen() || M) && "Must have a llvm::Module at this point"); + assert((!getCodeGen(Action) || M) && + "Must have a llvm::Module at this point"); LastPTU.TheModule = std::move(M); LLVM_DEBUG(llvm::dbgs() << "compile-ptu " << PTUs.size() - 1 @@ -559,6 +587,16 @@ Interpreter::Parse(llvm::StringRef Code) { llvm::Expected DeviceTU = DeviceParser->Parse(Code); if (auto E = DeviceTU.takeError()) return std::move(E); + + RegisterPTU(*DeviceTU, nullptr, DeviceAct.get()); + + llvm::Expected PTX = DeviceParser->GeneratePTX(); + if (!PTX) + return PTX.takeError(); + + llvm::Error Err = DeviceParser->GenerateFatbinary(); + if (Err) + return std::move(Err); } // Tell the interpreter sliently ignore unused expressions since value @@ -583,6 +621,25 @@ createJITTargetMachineBuilder(const std::string &TT) { return llvm::orc::JITTargetMachineBuilder(llvm::Triple(TT)); } +llvm::Expected> +Interpreter::createLLJITBuilder( + std::unique_ptr EPC, + llvm::StringRef OrcRuntimePath) { + const std::string &TT = EPC->getTargetTriple().getTriple(); + auto JTMB = createJITTargetMachineBuilder(TT); + if (!JTMB) + return JTMB.takeError(); + auto JB = IncrementalExecutor::createDefaultJITBuilder(std::move(*JTMB)); + if (!JB) + return JB.takeError(); + + (*JB)->setExecutorProcessControl(std::move(EPC)); + (*JB)->setPlatformSetUp( + llvm::orc::ExecutorNativePlatform(OrcRuntimePath.str())); + + return std::move(*JB); +} + llvm::Error Interpreter::CreateExecutor() { if (IncrExecutor) return llvm::make_error("Operation failed. " @@ -711,24 +768,34 @@ llvm::Error Interpreter::Undo(unsigned N) { } llvm::Error Interpreter::LoadDynamicLibrary(const char *name) { +#ifdef __EMSCRIPTEN__ + void *handle = dlopen(name, RTLD_NOW | RTLD_GLOBAL); + if (!handle) { + llvm::errs() << dlerror() << '\n'; + return llvm::make_error("Failed to load dynamic library", + llvm::inconvertibleErrorCode()); + } +#else auto EE = getExecutionEngine(); if (!EE) return EE.takeError(); - auto &DL = EE->getDataLayout(); - - if (auto DLSG = llvm::orc::DynamicLibrarySearchGenerator::Load( - name, DL.getGlobalPrefix())) - EE->getMainJITDylib().addGenerator(std::move(*DLSG)); + if (auto DLSG = llvm::orc::EPCDynamicLibrarySearchGenerator::Load( + EE->getExecutionSession(), name)) + // FIXME: Eventually we should put each library in its own JITDylib and + // turn off process symbols by default. + EE->getProcessSymbolsJITDylib()->addGenerator(std::move(*DLSG)); else return DLSG.takeError(); +#endif return llvm::Error::success(); } -std::unique_ptr Interpreter::GenModule() { +std::unique_ptr +Interpreter::GenModule(IncrementalAction *Action) { static unsigned ID = 0; - if (CodeGenerator *CG = getCodeGen()) { + if (CodeGenerator *CG = getCodeGen(Action)) { // Clang's CodeGen is designed to work with a single llvm::Module. In many // cases for convenience various CodeGen parts have a reference to the // llvm::Module (TheModule or Module) which does not change when a new @@ -750,8 +817,10 @@ std::unique_ptr Interpreter::GenModule() { return nullptr; } -CodeGenerator *Interpreter::getCodeGen() const { - FrontendAction *WrappedAct = Act->getWrapped(); +CodeGenerator *Interpreter::getCodeGen(IncrementalAction *Action) const { + if (!Action) + Action = Act.get(); + FrontendAction *WrappedAct = Action->getWrapped(); if (!WrappedAct->hasIRSupport()) return nullptr; return static_cast(WrappedAct)->getCodeGenerator(); diff --git a/clang/lib/Interpreter/RemoteJITUtils.cpp b/clang/lib/Interpreter/RemoteJITUtils.cpp new file mode 100644 index 0000000000000..095a3a3d49ae9 --- /dev/null +++ b/clang/lib/Interpreter/RemoteJITUtils.cpp @@ -0,0 +1,297 @@ +//===-- RemoteJITUtils.cpp - Utilities for remote-JITing --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// FIXME: Unify this code with similar functionality in llvm-jitlink. +// +//===----------------------------------------------------------------------===// + +#include "clang/Interpreter/RemoteJITUtils.h" + +#include "llvm/ADT/StringExtras.h" +#include "llvm/ExecutionEngine/Orc/DebugObjectManagerPlugin.h" +#include "llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h" +#include "llvm/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.h" +#include "llvm/ExecutionEngine/Orc/MapperJITLinkMemoryManager.h" +#include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h" +#include "llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h" +#include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" + +#ifdef LLVM_ON_UNIX +#include +#include +#include +#include +#include +#endif // LLVM_ON_UNIX + +using namespace llvm; +using namespace llvm::orc; + +static std::atomic LaunchedExecutorPID{-1}; + +Expected getSlabAllocSize(StringRef SizeString) { + SizeString = SizeString.trim(); + + uint64_t Units = 1024; + + if (SizeString.ends_with_insensitive("kb")) + SizeString = SizeString.drop_back(2).rtrim(); + else if (SizeString.ends_with_insensitive("mb")) { + Units = 1024 * 1024; + SizeString = SizeString.drop_back(2).rtrim(); + } else if (SizeString.ends_with_insensitive("gb")) { + Units = 1024 * 1024 * 1024; + SizeString = SizeString.drop_back(2).rtrim(); + } + + uint64_t SlabSize = 0; + if (SizeString.getAsInteger(10, SlabSize)) + return make_error("Invalid numeric format for slab size", + inconvertibleErrorCode()); + + return SlabSize * Units; +} + +Expected> +createSharedMemoryManager(SimpleRemoteEPC &SREPC, + StringRef SlabAllocateSizeString) { + SharedMemoryMapper::SymbolAddrs SAs; + if (auto Err = SREPC.getBootstrapSymbols( + {{SAs.Instance, rt::ExecutorSharedMemoryMapperServiceInstanceName}, + {SAs.Reserve, + rt::ExecutorSharedMemoryMapperServiceReserveWrapperName}, + {SAs.Initialize, + rt::ExecutorSharedMemoryMapperServiceInitializeWrapperName}, + {SAs.Deinitialize, + rt::ExecutorSharedMemoryMapperServiceDeinitializeWrapperName}, + {SAs.Release, + rt::ExecutorSharedMemoryMapperServiceReleaseWrapperName}})) + return std::move(Err); + +#ifdef _WIN32 + size_t SlabSize = 1024 * 1024; +#else + size_t SlabSize = 1024 * 1024 * 1024; +#endif + + if (!SlabAllocateSizeString.empty()) { + if (auto S = getSlabAllocSize(SlabAllocateSizeString)) + SlabSize = *S; + else + return S.takeError(); + } + + return MapperJITLinkMemoryManager::CreateWithMapper( + SlabSize, SREPC, SAs); +} + +Expected> +launchExecutor(StringRef ExecutablePath, bool UseSharedMemory, + llvm::StringRef SlabAllocateSizeString, int stdin_fd = 0, int stdout_fd = 1, int stderr_fd = 2) { +#ifndef LLVM_ON_UNIX + return make_error("-" + ExecutablePath + + " not supported on non-unix platforms", + inconvertibleErrorCode()); +#elif !LLVM_ENABLE_THREADS + return make_error( + "-" + ExecutablePath + + " requires threads, but LLVM was built with " + "LLVM_ENABLE_THREADS=Off", + inconvertibleErrorCode()); +#else + if (!sys::fs::can_execute(ExecutablePath)) + return make_error( + formatv("Specified executor invalid: {0}", ExecutablePath), + inconvertibleErrorCode()); + + constexpr int ReadEnd = 0; + constexpr int WriteEnd = 1; + + // Pipe FDs for RPC + int ToExecutor[2]; + int FromExecutor[2]; + + pid_t ChildPID; + + // Create pipes for RPC communication + if (pipe(ToExecutor) != 0 || pipe(FromExecutor) != 0) + return make_error("Unable to create RPC pipes", + inconvertibleErrorCode()); + + ChildPID = fork(); + + if (ChildPID == 0) { + // In the child... + + // Close parent ends of RPC pipes + close(ToExecutor[WriteEnd]); + close(FromExecutor[ReadEnd]); + + // Redirect stdin, stdout, and stderr to provided file descriptors + if (stdin_fd != 0) { + dup2(stdin_fd, STDIN_FILENO); + if (stdin_fd != STDIN_FILENO) + close(stdin_fd); + } + + if (stdout_fd != 1) { + dup2(stdout_fd, STDOUT_FILENO); + if (stdout_fd != STDOUT_FILENO) + close(stdout_fd); + + setvbuf(stdout, NULL, _IONBF, 0); + } + + if (stderr_fd != 2) { + dup2(stderr_fd, STDERR_FILENO); + if (stderr_fd != STDERR_FILENO) + close(stderr_fd); + + setvbuf(stderr, NULL, _IONBF, 0); + } + + // Execute the child process + std::unique_ptr ExecutorPath, FDSpecifier; + { + ExecutorPath = std::make_unique(ExecutablePath.size() + 1); + strcpy(ExecutorPath.get(), ExecutablePath.data()); + + std::string FDSpecifierStr("filedescs="); + FDSpecifierStr += utostr(ToExecutor[ReadEnd]); + FDSpecifierStr += ','; + FDSpecifierStr += utostr(FromExecutor[WriteEnd]); + FDSpecifier = std::make_unique(FDSpecifierStr.size() + 1); + strcpy(FDSpecifier.get(), FDSpecifierStr.c_str()); + } + + char *const Args[] = {ExecutorPath.get(), FDSpecifier.get(), nullptr}; + int RC = execvp(ExecutorPath.get(), Args); + if (RC != 0) { + errs() << "unable to launch out-of-process executor \"" + << ExecutorPath.get() << "\"\n"; + exit(1); + } + } else { + LaunchedExecutorPID = ChildPID; + } + + // Close child ends of RPC pipes + close(ToExecutor[ReadEnd]); + close(FromExecutor[WriteEnd]); + + auto S = SimpleRemoteEPC::Setup(); + if (UseSharedMemory) + S.CreateMemoryManager = [SlabAllocateSizeString](SimpleRemoteEPC &EPC) { + return createSharedMemoryManager(EPC, SlabAllocateSizeString); + }; + + auto EPC = SimpleRemoteEPC::Create( + std::make_unique(std::nullopt), + std::move(S), FromExecutor[ReadEnd], ToExecutor[WriteEnd]); + + return EPC; +#endif +} + +#if LLVM_ON_UNIX && LLVM_ENABLE_THREADS + +static Expected connectTCPSocketImpl(std::string Host, + std::string PortStr) { + addrinfo *AI; + addrinfo Hints{}; + Hints.ai_family = AF_INET; + Hints.ai_socktype = SOCK_STREAM; + Hints.ai_flags = AI_NUMERICSERV; + + if (int EC = getaddrinfo(Host.c_str(), PortStr.c_str(), &Hints, &AI)) + return make_error( + formatv("address resolution failed ({0})", gai_strerror(EC)), + inconvertibleErrorCode()); + // Cycle through the returned addrinfo structures and connect to the first + // reachable endpoint. + int SockFD; + addrinfo *Server; + for (Server = AI; Server != nullptr; Server = Server->ai_next) { + // socket might fail, e.g. if the address family is not supported. Skip to + // the next addrinfo structure in such a case. + if ((SockFD = socket(AI->ai_family, AI->ai_socktype, AI->ai_protocol)) < 0) + continue; + + // If connect returns null, we exit the loop with a working socket. + if (connect(SockFD, Server->ai_addr, Server->ai_addrlen) == 0) + break; + + close(SockFD); + } + freeaddrinfo(AI); + + // If we reached the end of the loop without connecting to a valid endpoint, + // dump the last error that was logged in socket() or connect(). + if (Server == nullptr) + return make_error("invalid hostname", + inconvertibleErrorCode()); + + return SockFD; +} +#endif + +Expected> +connectTCPSocket(StringRef NetworkAddress, bool UseSharedMemory, + llvm::StringRef SlabAllocateSizeString) { +#ifndef LLVM_ON_UNIX + // FIXME: Add TCP support for Windows. + return make_error("-" + NetworkAddress + + " not supported on non-unix platforms", + inconvertibleErrorCode()); +#elif !LLVM_ENABLE_THREADS + // Out of process mode using SimpleRemoteEPC depends on threads. + return make_error( + "-" + NetworkAddress + + " requires threads, but LLVM was built with " + "LLVM_ENABLE_THREADS=Off", + inconvertibleErrorCode()); +#else + + auto CreateErr = [NetworkAddress](Twine Details) { + return make_error( + formatv("Failed to connect TCP socket '{0}': {1}", NetworkAddress, + Details), + inconvertibleErrorCode()); + }; + + StringRef Host, PortStr; + std::tie(Host, PortStr) = NetworkAddress.split(':'); + if (Host.empty()) + return CreateErr("Host name for -" + NetworkAddress + " can not be empty"); + if (PortStr.empty()) + return CreateErr("Port number in -" + NetworkAddress + " can not be empty"); + int Port = 0; + if (PortStr.getAsInteger(10, Port)) + return CreateErr("Port number '" + PortStr + "' is not a valid integer"); + + Expected SockFD = connectTCPSocketImpl(Host.str(), PortStr.str()); + if (!SockFD) + return SockFD.takeError(); + + auto S = SimpleRemoteEPC::Setup(); + if (UseSharedMemory) + S.CreateMemoryManager = [SlabAllocateSizeString](SimpleRemoteEPC &EPC) { + return createSharedMemoryManager(EPC, SlabAllocateSizeString); + }; + + return SimpleRemoteEPC::Create( + std::make_unique(std::nullopt), + std::move(S), *SockFD, *SockFD); +#endif +} + +pid_t getLastLaunchedExecutorPID() { + return LaunchedExecutorPID; +} diff --git a/clang/lib/Interpreter/Wasm.cpp b/clang/lib/Interpreter/Wasm.cpp index aa10b160ccf84..74c83169ced6c 100644 --- a/clang/lib/Interpreter/Wasm.cpp +++ b/clang/lib/Interpreter/Wasm.cpp @@ -144,6 +144,19 @@ llvm::Error WasmIncrementalExecutor::cleanUp() { return llvm::Error::success(); } +llvm::Expected +WasmIncrementalExecutor::getSymbolAddress(llvm::StringRef Name, + SymbolNameKind NameKind) const { + void *Sym = dlsym(RTLD_DEFAULT, Name.str().c_str()); + if (!Sym) { + return llvm::make_error("dlsym failed for symbol: " + + Name.str(), + llvm::inconvertibleErrorCode()); + } + + return llvm::orc::ExecutorAddr::fromPtr(Sym); +} + WasmIncrementalExecutor::~WasmIncrementalExecutor() = default; } // namespace clang \ No newline at end of file diff --git a/clang/lib/Interpreter/Wasm.h b/clang/lib/Interpreter/Wasm.h index 4632613326d39..9a752934e3185 100644 --- a/clang/lib/Interpreter/Wasm.h +++ b/clang/lib/Interpreter/Wasm.h @@ -29,6 +29,9 @@ class WasmIncrementalExecutor : public IncrementalExecutor { llvm::Error removeModule(PartialTranslationUnit &PTU) override; llvm::Error runCtors() const override; llvm::Error cleanUp() override; + llvm::Expected + getSymbolAddress(llvm::StringRef Name, + SymbolNameKind NameKind) const override; ~WasmIncrementalExecutor() override; }; diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp index 0cadede51a9b3..2fab1dfed4a00 100644 --- a/clang/lib/Parse/ParseExpr.cpp +++ b/clang/lib/Parse/ParseExpr.cpp @@ -2237,8 +2237,6 @@ Parser::ParsePostfixExpressionSuffix(ExprResult LHS) { if (PP.isCodeCompletionReached() && !CalledSignatureHelp) RunSignatureHelp(); LHS = ExprError(); - } else if (!HasError && HasTrailingComma) { - Diag(Tok, diag::err_expected_expression); } else if (LHS.isInvalid()) { for (auto &E : ArgExprs) Actions.CorrectDelayedTyposInExpr(E); @@ -3738,7 +3736,6 @@ bool Parser::ParseExpressionList(SmallVectorImpl &Exprs, if (Tok.is(tok::r_paren)) { if (HasTrailingComma) *HasTrailingComma = true; - break; } } if (SawError) { diff --git a/clang/lib/Parse/ParseInit.cpp b/clang/lib/Parse/ParseInit.cpp index 63b1d7bd9db53..471b3eaf28287 100644 --- a/clang/lib/Parse/ParseInit.cpp +++ b/clang/lib/Parse/ParseInit.cpp @@ -445,7 +445,7 @@ ExprResult Parser::createEmbedExpr() { Context.MakeIntValue(Str.size(), Context.getSizeType()); QualType ArrayTy = Context.getConstantArrayType( Ty, ArraySize, nullptr, ArraySizeModifier::Normal, 0); - return StringLiteral::Create(Context, Str, StringLiteralKind::Ordinary, + return StringLiteral::Create(Context, Str, StringLiteralKind::Binary, false, ArrayTy, StartLoc); }; diff --git a/clang/lib/Sema/SemaAttr.cpp b/clang/lib/Sema/SemaAttr.cpp index 6907fa91e28c2..27b5eb5f2c773 100644 --- a/clang/lib/Sema/SemaAttr.cpp +++ b/clang/lib/Sema/SemaAttr.cpp @@ -14,6 +14,7 @@ #include "CheckExprLifetime.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/Attr.h" +#include "clang/AST/DeclCXX.h" #include "clang/AST/Expr.h" #include "clang/Basic/TargetInfo.h" #include "clang/Lex/Preprocessor.h" @@ -219,6 +220,10 @@ void Sema::inferGslOwnerPointerAttribute(CXXRecordDecl *Record) { void Sema::inferLifetimeBoundAttribute(FunctionDecl *FD) { if (FD->getNumParams() == 0) return; + // Skip void returning functions (except constructors). This can occur in + // cases like 'as_const'. + if (!isa(FD) && FD->getReturnType()->isVoidType()) + return; if (unsigned BuiltinID = FD->getBuiltinID()) { // Add lifetime attribute to std::move, std::fowrard et al. diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp index 54bc52fa2ac40..d0d44e8899133 100644 --- a/clang/lib/Sema/SemaCast.cpp +++ b/clang/lib/Sema/SemaCast.cpp @@ -1151,10 +1151,33 @@ static unsigned int checkCastFunctionType(Sema &Self, const ExprResult &SrcExpr, return false; }; + auto IsFarProc = [](const FunctionType *T) { + // The definition of FARPROC depends on the platform in terms of its return + // type, which could be int, or long long, etc. We'll look for a source + // signature for: (*)() and call that "close enough" to + // FARPROC to be sufficient to silence the diagnostic. This is similar to + // how we allow casts between function pointers and void * for supporting + // dlsym. + // Note: we could check for __stdcall on the function pointer as well, but + // that seems like splitting hairs. + if (!T->getReturnType()->isIntegerType()) + return false; + if (const auto *PT = T->getAs()) + return !PT->isVariadic() && PT->getNumParams() == 0; + return true; + }; + // Skip if either function type is void(*)(void) if (IsVoidVoid(SrcFTy) || IsVoidVoid(DstFTy)) return 0; + // On Windows, GetProcAddress() returns a FARPROC, which is a typedef for a + // function pointer type (with no prototype, in C). We don't want to diagnose + // this case so we don't diagnose idiomatic code on Windows. + if (Self.getASTContext().getTargetInfo().getTriple().isOSWindows() && + IsFarProc(SrcFTy)) + return 0; + // Check return type. if (!argTypeIsABIEquivalent(SrcFTy->getReturnType(), DstFTy->getReturnType(), Self.Context)) diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp index a7b609f7f3ce4..8adebccde042c 100644 --- a/clang/lib/Sema/SemaConcept.cpp +++ b/clang/lib/Sema/SemaConcept.cpp @@ -702,75 +702,6 @@ bool Sema::CheckConstraintSatisfaction(const Expr *ConstraintExpr, .isInvalid(); } -bool Sema::addInstantiatedCapturesToScope( - FunctionDecl *Function, const FunctionDecl *PatternDecl, - LocalInstantiationScope &Scope, - const MultiLevelTemplateArgumentList &TemplateArgs) { - const auto *LambdaClass = cast(Function)->getParent(); - const auto *LambdaPattern = cast(PatternDecl)->getParent(); - - unsigned Instantiated = 0; - - // FIXME: This is a workaround for not having deferred lambda body - // instantiation. - // When transforming a lambda's body, if we encounter another call to a - // nested lambda that contains a constraint expression, we add all of the - // outer lambda's instantiated captures to the current instantiation scope to - // facilitate constraint evaluation. However, these captures don't appear in - // the CXXRecordDecl until after the lambda expression is rebuilt, so we - // pull them out from the corresponding LSI. - LambdaScopeInfo *InstantiatingScope = nullptr; - if (LambdaPattern->capture_size() && !LambdaClass->capture_size()) { - for (FunctionScopeInfo *Scope : llvm::reverse(FunctionScopes)) { - auto *LSI = dyn_cast(Scope); - if (!LSI || - LSI->CallOperator->getTemplateInstantiationPattern() != PatternDecl) - continue; - InstantiatingScope = LSI; - break; - } - assert(InstantiatingScope); - } - - auto AddSingleCapture = [&](const ValueDecl *CapturedPattern, - unsigned Index) { - ValueDecl *CapturedVar = - InstantiatingScope ? InstantiatingScope->Captures[Index].getVariable() - : LambdaClass->getCapture(Index)->getCapturedVar(); - assert(CapturedVar->isInitCapture()); - Scope.InstantiatedLocal(CapturedPattern, CapturedVar); - }; - - for (const LambdaCapture &CapturePattern : LambdaPattern->captures()) { - if (!CapturePattern.capturesVariable()) { - Instantiated++; - continue; - } - ValueDecl *CapturedPattern = CapturePattern.getCapturedVar(); - - if (!CapturedPattern->isInitCapture()) { - Instantiated++; - continue; - } - - if (!CapturedPattern->isParameterPack()) { - AddSingleCapture(CapturedPattern, Instantiated++); - } else { - Scope.MakeInstantiatedLocalArgPack(CapturedPattern); - SmallVector Unexpanded; - SemaRef.collectUnexpandedParameterPacks( - dyn_cast(CapturedPattern)->getInit(), Unexpanded); - auto NumArgumentsInExpansion = - getNumArgumentsInExpansionFromUnexpanded(Unexpanded, TemplateArgs); - if (!NumArgumentsInExpansion) - continue; - for (unsigned Arg = 0; Arg < *NumArgumentsInExpansion; ++Arg) - AddSingleCapture(CapturedPattern, Instantiated++); - } - } - return false; -} - bool Sema::SetupConstraintScope( FunctionDecl *FD, std::optional> TemplateArgs, const MultiLevelTemplateArgumentList &MLTAL, diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index f70401ea33b4a..41d5f9f2f3420 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -8145,7 +8145,7 @@ NamedDecl *Sema::ActOnVariableDeclarator( (D.getCXXScopeSpec().isSet() && DC && DC->isRecord() && DC->isDependentContext()) ? TPC_ClassTemplateMember - : TPC_VarTemplate)) + : TPC_Other)) NewVD->setInvalidDecl(); // If we are providing an explicit specialization of a static variable diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index e4e3bbad1f520..85de46c9adab4 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -13533,7 +13533,7 @@ Decl *Sema::ActOnAliasDeclaration(Scope *S, AccessSpecifier AS, // Merge any previous default template arguments into our parameters, // and check the parameter list. if (CheckTemplateParameterList(TemplateParams, OldTemplateParams, - TPC_TypeAliasTemplate)) + TPC_Other)) return nullptr; TypeAliasTemplateDecl *NewDecl = diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index e253e3a17328f..23d0f9532d4f8 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -1519,7 +1519,7 @@ static void checkEnumArithmeticConversions(Sema &S, Expr *LHS, Expr *RHS, // In C++ 26, usual arithmetic conversions between 2 different enum types // are ill-formed. if (S.getLangOpts().CPlusPlus26) - DiagID = diag::err_conv_mixed_enum_types_cxx26; + DiagID = diag::zzzz_warn_conv_mixed_enum_types_cxx26; else if (!L->castAs()->getDecl()->hasNameForLinkage() || !R->castAs()->getDecl()->hasNameForLinkage()) { // If either enumeration type is unnamed, it's less likely that the diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index 1e39d69e8b230..c6621402adfc9 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -4143,6 +4143,7 @@ Sema::IsStringLiteralToNonConstPointerConversion(Expr *From, QualType ToType) { // We don't allow UTF literals to be implicitly converted break; case StringLiteralKind::Ordinary: + case StringLiteralKind::Binary: return (ToPointeeType->getKind() == BuiltinType::Char_U || ToPointeeType->getKind() == BuiltinType::Char_S); case StringLiteralKind::Wide: diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index 37796758960cd..6e9ed875b50c5 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -105,6 +105,7 @@ static StringInitFailureKind IsStringInit(Expr *Init, const ArrayType *AT, return SIF_None; [[fallthrough]]; case StringLiteralKind::Ordinary: + case StringLiteralKind::Binary: // char array can be initialized with a narrow string. // Only allow char x[] = "foo"; not char x[] = L"foo"; if (ElemTy->isCharType()) diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp index ceb32ee15dfa3..981856fbf25a7 100644 --- a/clang/lib/Sema/SemaLambda.cpp +++ b/clang/lib/Sema/SemaLambda.cpp @@ -2389,6 +2389,74 @@ static FunctionDecl *getPatternFunctionDecl(FunctionDecl *FD) { return FTD->getTemplatedDecl(); } +bool Sema::addInstantiatedCapturesToScope( + FunctionDecl *Function, const FunctionDecl *PatternDecl, + LocalInstantiationScope &Scope, + const MultiLevelTemplateArgumentList &TemplateArgs) { + const auto *LambdaClass = cast(Function)->getParent(); + const auto *LambdaPattern = cast(PatternDecl)->getParent(); + + unsigned Instantiated = 0; + + // FIXME: This is a workaround for not having deferred lambda body + // instantiation. + // When transforming a lambda's body, if we encounter another call to a + // nested lambda that contains a constraint expression, we add all of the + // outer lambda's instantiated captures to the current instantiation scope to + // facilitate constraint evaluation. However, these captures don't appear in + // the CXXRecordDecl until after the lambda expression is rebuilt, so we + // pull them out from the corresponding LSI. + LambdaScopeInfo *InstantiatingScope = nullptr; + if (LambdaPattern->capture_size() && !LambdaClass->capture_size()) { + for (FunctionScopeInfo *Scope : llvm::reverse(FunctionScopes)) { + auto *LSI = dyn_cast(Scope); + if (!LSI || getPatternFunctionDecl(LSI->CallOperator) != PatternDecl) + continue; + InstantiatingScope = LSI; + break; + } + assert(InstantiatingScope); + } + + auto AddSingleCapture = [&](const ValueDecl *CapturedPattern, + unsigned Index) { + ValueDecl *CapturedVar = + InstantiatingScope ? InstantiatingScope->Captures[Index].getVariable() + : LambdaClass->getCapture(Index)->getCapturedVar(); + assert(CapturedVar->isInitCapture()); + Scope.InstantiatedLocal(CapturedPattern, CapturedVar); + }; + + for (const LambdaCapture &CapturePattern : LambdaPattern->captures()) { + if (!CapturePattern.capturesVariable()) { + Instantiated++; + continue; + } + ValueDecl *CapturedPattern = CapturePattern.getCapturedVar(); + + if (!CapturedPattern->isInitCapture()) { + Instantiated++; + continue; + } + + if (!CapturedPattern->isParameterPack()) { + AddSingleCapture(CapturedPattern, Instantiated++); + } else { + Scope.MakeInstantiatedLocalArgPack(CapturedPattern); + SmallVector Unexpanded; + SemaRef.collectUnexpandedParameterPacks( + dyn_cast(CapturedPattern)->getInit(), Unexpanded); + auto NumArgumentsInExpansion = + getNumArgumentsInExpansionFromUnexpanded(Unexpanded, TemplateArgs); + if (!NumArgumentsInExpansion) + continue; + for (unsigned Arg = 0; Arg < *NumArgumentsInExpansion; ++Arg) + AddSingleCapture(CapturedPattern, Instantiated++); + } + } + return false; +} + Sema::LambdaScopeForCallOperatorInstantiationRAII:: LambdaScopeForCallOperatorInstantiationRAII( Sema &SemaRef, FunctionDecl *FD, MultiLevelTemplateArgumentList MLTAL, diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index 938671055333c..1c555b38277b0 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -1591,8 +1591,16 @@ NamedDecl *Sema::ActOnTemplateTemplateParameter( assert(S->isTemplateParamScope() && "Template template parameter not in template parameter scope!"); - // Construct the parameter object. bool IsParameterPack = EllipsisLoc.isValid(); + + bool Invalid = false; + if (CheckTemplateParameterList( + Params, + /*OldParams=*/nullptr, + IsParameterPack ? TPC_TemplateTemplateParameterPack : TPC_Other)) + Invalid = true; + + // Construct the parameter object. TemplateTemplateParmDecl *Param = TemplateTemplateParmDecl::Create( Context, Context.getTranslationUnitDecl(), NameLoc.isInvalid() ? TmpLoc : NameLoc, Depth, Position, IsParameterPack, @@ -1615,9 +1623,12 @@ NamedDecl *Sema::ActOnTemplateTemplateParameter( if (Params->size() == 0) { Diag(Param->getLocation(), diag::err_template_template_parm_no_parms) << SourceRange(Params->getLAngleLoc(), Params->getRAngleLoc()); - Param->setInvalidDecl(); + Invalid = true; } + if (Invalid) + Param->setInvalidDecl(); + // C++0x [temp.param]p9: // A default template-argument may be specified for any kind of // template-parameter that is not a template parameter pack. @@ -2066,7 +2077,7 @@ DeclResult Sema::CheckClassTemplate( SemanticContext->isDependentContext()) ? TPC_ClassTemplateMember : TUK == TagUseKind::Friend ? TPC_FriendClassTemplate - : TPC_ClassTemplate, + : TPC_Other, SkipBody)) Invalid = true; @@ -2208,9 +2219,8 @@ static bool DiagnoseDefaultTemplateArgument(Sema &S, SourceLocation ParamLoc, SourceRange DefArgRange) { switch (TPC) { - case Sema::TPC_ClassTemplate: - case Sema::TPC_VarTemplate: - case Sema::TPC_TypeAliasTemplate: + case Sema::TPC_Other: + case Sema::TPC_TemplateTemplateParameterPack: return false; case Sema::TPC_FunctionTemplate: @@ -2383,8 +2393,11 @@ bool Sema::CheckTemplateParameterList(TemplateParameterList *NewParams, MissingDefaultArg = true; } else if (NonTypeTemplateParmDecl *NewNonTypeParm = dyn_cast(*NewParam)) { - // Check for unexpanded parameter packs. - if (!NewNonTypeParm->isParameterPack() && + // Check for unexpanded parameter packs, except in a template template + // parameter pack, as in those any unexpanded packs should be expanded + // along with the parameter itself. + if (TPC != TPC_TemplateTemplateParameterPack && + !NewNonTypeParm->isParameterPack() && DiagnoseUnexpandedParameterPack(NewNonTypeParm->getLocation(), NewNonTypeParm->getTypeSourceInfo(), UPPC_NonTypeTemplateParameterType)) { @@ -2492,8 +2505,7 @@ bool Sema::CheckTemplateParameterList(TemplateParameterList *NewParams, // If a template parameter of a primary class template or alias template // is a template parameter pack, it shall be the last template parameter. if (SawParameterPack && (NewParam + 1) != NewParamEnd && - (TPC == TPC_ClassTemplate || TPC == TPC_VarTemplate || - TPC == TPC_TypeAliasTemplate)) { + (TPC == TPC_Other || TPC == TPC_TemplateTemplateParameterPack)) { Diag((*NewParam)->getLocation(), diag::err_template_param_pack_must_be_last_template_parameter); Invalid = true; @@ -2526,8 +2538,8 @@ bool Sema::CheckTemplateParameterList(TemplateParameterList *NewParams, << PrevModuleName; Invalid = true; } else if (MissingDefaultArg && - (TPC == TPC_ClassTemplate || TPC == TPC_FriendClassTemplate || - TPC == TPC_VarTemplate || TPC == TPC_TypeAliasTemplate)) { + (TPC == TPC_Other || TPC == TPC_TemplateTemplateParameterPack || + TPC == TPC_FriendClassTemplate)) { // C++ 23[temp.param]p14: // If a template-parameter of a class template, variable template, or // alias template has a default template argument, each subsequent diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp index 5304b5a2155b4..7a880505a53ff 100644 --- a/clang/lib/Sema/SemaTemplateDeduction.cpp +++ b/clang/lib/Sema/SemaTemplateDeduction.cpp @@ -3427,9 +3427,9 @@ static TemplateDeductionResult FinishTemplateArgumentDeduction( if (!P.isPackExpansion() && !A.isPackExpansion()) { Info.Param = makeTemplateParameter(Template->getTemplateParameters()->getParam( - (PsStack.empty() ? TemplateArgs.end() - : PsStack.front().begin()) - - TemplateArgs.begin())); + (AsStack.empty() ? CTAI.CanonicalConverted.end() + : AsStack.front().begin()) - + 1 - CTAI.CanonicalConverted.begin())); Info.FirstArg = P; Info.SecondArg = A; return TemplateDeductionResult::NonDeducedMismatch; @@ -6625,17 +6625,19 @@ bool Sema::isTemplateTemplateParameterAtLeastAsSpecializedAs( TemplateDeductionResult TDK; runWithSufficientStackSpace(Info.getLocation(), [&] { - TDK = ::FinishTemplateArgumentDeduction( - *this, AArg, /*IsPartialOrdering=*/true, PArgs, Deduced, Info); + TDK = ::FinishTemplateArgumentDeduction(*this, AArg, PartialOrdering, PArgs, + Deduced, Info); }); switch (TDK) { case TemplateDeductionResult::Success: return true; // It doesn't seem possible to get a non-deduced mismatch when partial - // ordering TTPs. + // ordering TTPs, except with an invalid template parameter list which has + // a parameter after a pack. case TemplateDeductionResult::NonDeducedMismatch: - llvm_unreachable("Unexpected NonDeducedMismatch"); + assert(PArg->isInvalidDecl() && "Unexpected NonDeducedMismatch"); + return false; // Substitution failures should have already been diagnosed. case TemplateDeductionResult::AlreadyDiagnosed: diff --git a/clang/lib/Sema/SemaTemplateDeductionGuide.cpp b/clang/lib/Sema/SemaTemplateDeductionGuide.cpp index b424de9c8a945..6728857edc6d8 100644 --- a/clang/lib/Sema/SemaTemplateDeductionGuide.cpp +++ b/clang/lib/Sema/SemaTemplateDeductionGuide.cpp @@ -377,12 +377,10 @@ struct ConvertConstructorToDeductionGuideTransform { if (NestedPattern) Args.addOuterRetainedLevels(NestedPattern->getTemplateDepth()); auto [Depth, Index] = getDepthAndIndex(Param); - // Depth can still be 0 if FTD belongs to an explicit class template - // specialization with an empty template parameter list. In that case, - // we don't want the NewDepth to overflow, and it should remain 0. - assert(Depth || - cast(FTD->getDeclContext()) - ->isExplicitSpecialization()); + // Depth can be 0 if FTD belongs to a non-template class/a class + // template specialization with an empty template parameter list. In + // that case, we don't want the NewDepth to overflow, and it should + // remain 0. NamedDecl *NewParam = transformTemplateParameter( SemaRef, DC, Param, Args, Index + Depth1IndexAdjustment, Depth ? Depth - 1 : 0); @@ -989,6 +987,19 @@ getRHSTemplateDeclAndArgs(Sema &SemaRef, TypeAliasTemplateDecl *AliasTemplate) { return {Template, AliasRhsTemplateArgs}; } +bool IsNonDeducedArgument(const TemplateArgument &TA) { + // The following cases indicate the template argument is non-deducible: + // 1. The result is null. E.g. When it comes from a default template + // argument that doesn't appear in the alias declaration. + // 2. The template parameter is a pack and that cannot be deduced from + // the arguments within the alias declaration. + // Non-deducible template parameters will persist in the transformed + // deduction guide. + return TA.isNull() || + (TA.getKind() == TemplateArgument::Pack && + llvm::any_of(TA.pack_elements(), IsNonDeducedArgument)); +} + // Build deduction guides for a type alias template from the given underlying // deduction guide F. FunctionTemplateDecl * @@ -1057,7 +1068,8 @@ BuildDeductionGuideForTypeAlias(Sema &SemaRef, // !!NOTE: DeduceResults respects the sequence of template parameters of // the deduction guide f. for (unsigned Index = 0; Index < DeduceResults.size(); ++Index) { - if (const auto &D = DeduceResults[Index]; !D.isNull()) // Deduced + const auto &D = DeduceResults[Index]; + if (!IsNonDeducedArgument(D)) DeducedArgs.push_back(D); else NonDeducedTemplateParamsInFIndex.push_back(Index); @@ -1121,7 +1133,7 @@ BuildDeductionGuideForTypeAlias(Sema &SemaRef, Args.addOuterTemplateArguments(TransformedDeducedAliasArgs); for (unsigned Index = 0; Index < DeduceResults.size(); ++Index) { const auto &D = DeduceResults[Index]; - if (D.isNull()) { + if (IsNonDeducedArgument(D)) { // 2): Non-deduced template parameters would be substituted later. continue; } diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index cf29d8a101b43..73567f3be814d 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -1347,6 +1347,16 @@ std::optional Sema::isSFINAEContext() const { return std::nullopt; } +static TemplateArgument +getPackSubstitutedTemplateArgument(Sema &S, TemplateArgument Arg) { + assert(S.ArgumentPackSubstitutionIndex >= 0); + assert(S.ArgumentPackSubstitutionIndex < (int)Arg.pack_size()); + Arg = Arg.pack_begin()[S.ArgumentPackSubstitutionIndex]; + if (Arg.isPackExpansion()) + Arg = Arg.getPackExpansionPattern(); + return Arg; +} + //===----------------------------------------------------------------------===/ // Template Instantiation for Types //===----------------------------------------------------------------------===/ @@ -1466,11 +1476,13 @@ namespace { } } - static TemplateArgument + TemplateArgument getTemplateArgumentPackPatternForRewrite(const TemplateArgument &TA) { if (TA.getKind() != TemplateArgument::Pack) return TA; - assert(TA.pack_size() == 1 && + if (SemaRef.ArgumentPackSubstitutionIndex != -1) + return getPackSubstitutedTemplateArgument(SemaRef, TA); + assert(TA.pack_size() == 1 && TA.pack_begin()->isPackExpansion() && "unexpected pack arguments in template rewrite"); TemplateArgument Arg = *TA.pack_begin(); if (Arg.isPackExpansion()) @@ -1629,6 +1641,9 @@ namespace { std::vector TArgs; switch (Arg.getKind()) { case TemplateArgument::Pack: + assert(SemaRef.CodeSynthesisContexts.empty() || + SemaRef.CodeSynthesisContexts.back().Kind == + Sema::CodeSynthesisContext::BuildingDeductionGuides); // Literally rewrite the template argument pack, instead of unpacking // it. for (auto &pack : Arg.getPackAsArray()) { @@ -1649,6 +1664,23 @@ namespace { return inherited::TransformTemplateArgument(Input, Output, Uneval); } + std::optional ComputeSizeOfPackExprWithoutSubstitution( + ArrayRef PackArgs) { + // Don't do this when rewriting template parameters for CTAD: + // 1) The heuristic needs the unpacked Subst* nodes to figure out the + // expanded size, but this never applies since Subst* nodes are not + // created in rewrite scenarios. + // + // 2) The heuristic substitutes into the pattern with pack expansion + // suppressed, which does not meet the requirements for argument + // rewriting when template arguments include a non-pack matching against + // a pack, particularly when rewriting an alias CTAD. + if (TemplateArgs.isRewrite()) + return std::nullopt; + + return inherited::ComputeSizeOfPackExprWithoutSubstitution(PackArgs); + } + template QualType TransformFunctionProtoType(TypeLocBuilder &TLB, FunctionProtoTypeLoc TL, @@ -1867,16 +1899,6 @@ bool TemplateInstantiator::AlreadyTransformed(QualType T) { return true; } -static TemplateArgument -getPackSubstitutedTemplateArgument(Sema &S, TemplateArgument Arg) { - assert(S.ArgumentPackSubstitutionIndex >= 0); - assert(S.ArgumentPackSubstitutionIndex < (int)Arg.pack_size()); - Arg = Arg.pack_begin()[S.ArgumentPackSubstitutionIndex]; - if (Arg.isPackExpansion()) - Arg = Arg.getPackExpansionPattern(); - return Arg; -} - Decl *TemplateInstantiator::TransformDecl(SourceLocation Loc, Decl *D) { if (!D) return nullptr; diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index 89ad2a0a9b7bb..0c25b87439a95 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -1827,7 +1827,7 @@ Decl *TemplateDeclInstantiator::VisitClassTemplateDecl(ClassTemplateDecl *D) { // Do some additional validation, then merge default arguments // from the existing declarations. if (SemaRef.CheckTemplateParameterList(InstParams, PrevParams, - Sema::TPC_ClassTemplate)) + Sema::TPC_Other)) return nullptr; Inst->setAccess(PrevClassTemplate->getAccess()); diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 2a5e354ff716a..3e8f0ec485e9b 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -3660,6 +3660,9 @@ class TreeTransform { return SemaRef.BuildCXXNoexceptExpr(Range.getBegin(), Arg, Range.getEnd()); } + std::optional + ComputeSizeOfPackExprWithoutSubstitution(ArrayRef PackArgs); + /// Build a new expression to compute the length of a parameter pack. ExprResult RebuildSizeOfPackExpr(SourceLocation OperatorLoc, NamedDecl *Pack, SourceLocation PackLoc, @@ -15877,6 +15880,49 @@ TreeTransform::TransformPackExpansionExpr(PackExpansionExpr *E) { E->getNumExpansions()); } +template +std::optional +TreeTransform::ComputeSizeOfPackExprWithoutSubstitution( + ArrayRef PackArgs) { + std::optional Result = 0; + for (const TemplateArgument &Arg : PackArgs) { + if (!Arg.isPackExpansion()) { + Result = *Result + 1; + continue; + } + + TemplateArgumentLoc ArgLoc; + InventTemplateArgumentLoc(Arg, ArgLoc); + + // Find the pattern of the pack expansion. + SourceLocation Ellipsis; + std::optional OrigNumExpansions; + TemplateArgumentLoc Pattern = + getSema().getTemplateArgumentPackExpansionPattern(ArgLoc, Ellipsis, + OrigNumExpansions); + + // Substitute under the pack expansion. Do not expand the pack (yet). + TemplateArgumentLoc OutPattern; + Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), -1); + if (getDerived().TransformTemplateArgument(Pattern, OutPattern, + /*Uneval*/ true)) + return true; + + // See if we can determine the number of arguments from the result. + std::optional NumExpansions = + getSema().getFullyPackExpandedSize(OutPattern.getArgument()); + if (!NumExpansions) { + // No: we must be in an alias template expansion, and we're going to + // need to actually expand the packs. + Result = std::nullopt; + break; + } + + Result = *Result + *NumExpansions; + } + return Result; +} + template ExprResult TreeTransform::TransformSizeOfPackExpr(SizeOfPackExpr *E) { @@ -15942,42 +15988,8 @@ TreeTransform::TransformSizeOfPackExpr(SizeOfPackExpr *E) { } // Try to compute the result without performing a partial substitution. - std::optional Result = 0; - for (const TemplateArgument &Arg : PackArgs) { - if (!Arg.isPackExpansion()) { - Result = *Result + 1; - continue; - } - - TemplateArgumentLoc ArgLoc; - InventTemplateArgumentLoc(Arg, ArgLoc); - - // Find the pattern of the pack expansion. - SourceLocation Ellipsis; - std::optional OrigNumExpansions; - TemplateArgumentLoc Pattern = - getSema().getTemplateArgumentPackExpansionPattern(ArgLoc, Ellipsis, - OrigNumExpansions); - - // Substitute under the pack expansion. Do not expand the pack (yet). - TemplateArgumentLoc OutPattern; - Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), -1); - if (getDerived().TransformTemplateArgument(Pattern, OutPattern, - /*Uneval*/ true)) - return true; - - // See if we can determine the number of arguments from the result. - std::optional NumExpansions = - getSema().getFullyPackExpandedSize(OutPattern.getArgument()); - if (!NumExpansions) { - // No: we must be in an alias template expansion, and we're going to need - // to actually expand the packs. - Result = std::nullopt; - break; - } - - Result = *Result + *NumExpansions; - } + std::optional Result = + getDerived().ComputeSizeOfPackExprWithoutSubstitution(PackArgs); // Common case: we could determine the number of expansions without // substituting. diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index f524251c48ddd..427b3c82c4737 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -9616,9 +9616,9 @@ ModuleFile *ASTReader::getLocalModuleFile(ModuleFile &M, unsigned ID) const { return I == GlobalSubmoduleMap.end() ? nullptr : I->second; } else { // It's a prefix (preamble, PCH, ...). Look it up by index. - unsigned IndexFromEnd = ID >> 1; + int IndexFromEnd = static_cast(ID >> 1); assert(IndexFromEnd && "got reference to unknown module file"); - return getModuleManager().pch_modules().end()[-IndexFromEnd]; + return getModuleManager().pch_modules().end()[-static_cast(IndexFromEnd)]; } } @@ -9636,7 +9636,7 @@ unsigned ASTReader::getModuleFileID(ModuleFile *M) { auto PCHModules = getModuleManager().pch_modules(); auto I = llvm::find(PCHModules, M); assert(I != PCHModules.end() && "emitting reference to unknown file"); - return (I - PCHModules.end()) << 1; + return std::distance(I, PCHModules.end()) << 1; } std::optional ASTReader::getSourceDescriptor(unsigned ID) { diff --git a/clang/lib/Serialization/TemplateArgumentHasher.cpp b/clang/lib/Serialization/TemplateArgumentHasher.cpp index 598f098f526d0..5fd6941256fe2 100644 --- a/clang/lib/Serialization/TemplateArgumentHasher.cpp +++ b/clang/lib/Serialization/TemplateArgumentHasher.cpp @@ -65,7 +65,9 @@ void TemplateArgumentHasher::AddTemplateArgument(TemplateArgument TA) { switch (Kind) { case TemplateArgument::Null: - llvm_unreachable("Expected valid TemplateArgument"); + // These can occur in incomplete substitutions performed with code + // completion (see PartialOverloading). + break; case TemplateArgument::Type: AddQualType(TA.getAsType()); break; diff --git a/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp index cfdd3c9faa360..bcc4ca77f5887 100644 --- a/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp @@ -97,10 +97,14 @@ class BuiltinFunctionChecker : public Checker { void handleOverflowBuiltin(const CallEvent &Call, CheckerContext &C, BinaryOperator::Opcode Op, QualType ResultType) const; - const NoteTag *createBuiltinNoOverflowNoteTag(CheckerContext &C, - bool BothFeasible, SVal Arg1, - SVal Arg2, SVal Result) const; - const NoteTag *createBuiltinOverflowNoteTag(CheckerContext &C) const; + const NoteTag *createBuiltinOverflowNoteTag(CheckerContext &C, + bool BothFeasible, SVal Arg1, + SVal Arg2, SVal Result) const; + ProgramStateRef initStateAftetBuiltinOverflow(CheckerContext &C, + ProgramStateRef State, + const CallEvent &Call, + SVal RetCal, + bool IsOverflow) const; std::pair checkOverflow(CheckerContext &C, SVal RetVal, QualType Res) const; @@ -122,30 +126,24 @@ class BuiltinFunctionChecker : public Checker { } // namespace -const NoteTag *BuiltinFunctionChecker::createBuiltinNoOverflowNoteTag( - CheckerContext &C, bool BothFeasible, SVal Arg1, SVal Arg2, - SVal Result) const { - return C.getNoteTag([Result, Arg1, Arg2, BothFeasible]( - PathSensitiveBugReport &BR, llvm::raw_ostream &OS) { +const NoteTag *BuiltinFunctionChecker::createBuiltinOverflowNoteTag( + CheckerContext &C, bool overflow, SVal Arg1, SVal Arg2, SVal Result) const { + return C.getNoteTag([Result, Arg1, Arg2, overflow](PathSensitiveBugReport &BR, + llvm::raw_ostream &OS) { if (!BR.isInteresting(Result)) return; - // Propagate interestingness to input argumets if result is interesting. + // Propagate interestingness to input arguments if result is interesting. BR.markInteresting(Arg1); BR.markInteresting(Arg2); - if (BothFeasible) + if (overflow) + OS << "Assuming overflow"; + else OS << "Assuming no overflow"; }); } -const NoteTag * -BuiltinFunctionChecker::createBuiltinOverflowNoteTag(CheckerContext &C) const { - return C.getNoteTag([](PathSensitiveBugReport &BR, - llvm::raw_ostream &OS) { OS << "Assuming overflow"; }, - /*isPrunable=*/true); -} - std::pair BuiltinFunctionChecker::checkOverflow(CheckerContext &C, SVal RetVal, QualType Res) const { @@ -175,6 +173,29 @@ BuiltinFunctionChecker::checkOverflow(CheckerContext &C, SVal RetVal, return {MayOverflow || MayUnderflow, MayNotOverflow && MayNotUnderflow}; } +ProgramStateRef BuiltinFunctionChecker::initStateAftetBuiltinOverflow( + CheckerContext &C, ProgramStateRef State, const CallEvent &Call, + SVal RetVal, bool IsOverflow) const { + SValBuilder &SVB = C.getSValBuilder(); + SVal Arg1 = Call.getArgSVal(0); + SVal Arg2 = Call.getArgSVal(1); + auto BoolTy = C.getASTContext().BoolTy; + + ProgramStateRef NewState = + State->BindExpr(Call.getOriginExpr(), C.getLocationContext(), + SVB.makeTruthVal(IsOverflow, BoolTy)); + + if (auto L = Call.getArgSVal(2).getAs()) { + NewState = NewState->bindLoc(*L, RetVal, C.getLocationContext()); + + // Propagate taint if any of the arguments were tainted + if (isTainted(State, Arg1) || isTainted(State, Arg2)) + NewState = addTaint(NewState, *L); + } + + return NewState; +} + void BuiltinFunctionChecker::handleOverflowBuiltin(const CallEvent &Call, CheckerContext &C, BinaryOperator::Opcode Op, @@ -184,8 +205,6 @@ void BuiltinFunctionChecker::handleOverflowBuiltin(const CallEvent &Call, ProgramStateRef State = C.getState(); SValBuilder &SVB = C.getSValBuilder(); - const Expr *CE = Call.getOriginExpr(); - auto BoolTy = C.getASTContext().BoolTy; SVal Arg1 = Call.getArgSVal(0); SVal Arg2 = Call.getArgSVal(1); @@ -195,29 +214,20 @@ void BuiltinFunctionChecker::handleOverflowBuiltin(const CallEvent &Call, SVal RetVal = SVB.evalBinOp(State, Op, Arg1, Arg2, ResultType); auto [Overflow, NotOverflow] = checkOverflow(C, RetValMax, ResultType); - if (NotOverflow) { - ProgramStateRef StateNoOverflow = State->BindExpr( - CE, C.getLocationContext(), SVB.makeTruthVal(false, BoolTy)); - - if (auto L = Call.getArgSVal(2).getAs()) { - StateNoOverflow = - StateNoOverflow->bindLoc(*L, RetVal, C.getLocationContext()); - // Propagate taint if any of the argumets were tainted - if (isTainted(State, Arg1) || isTainted(State, Arg2)) - StateNoOverflow = addTaint(StateNoOverflow, *L); - } + if (NotOverflow) { + auto NewState = + initStateAftetBuiltinOverflow(C, State, Call, RetVal, false); - C.addTransition( - StateNoOverflow, - createBuiltinNoOverflowNoteTag( - C, /*BothFeasible=*/NotOverflow && Overflow, Arg1, Arg2, RetVal)); + C.addTransition(NewState, createBuiltinOverflowNoteTag( + C, /*overflow=*/false, Arg1, Arg2, RetVal)); } if (Overflow) { - C.addTransition(State->BindExpr(CE, C.getLocationContext(), - SVB.makeTruthVal(true, BoolTy)), - createBuiltinOverflowNoteTag(C)); + auto NewState = initStateAftetBuiltinOverflow(C, State, Call, RetVal, true); + + C.addTransition(NewState, createBuiltinOverflowNoteTag(C, /*overflow=*/true, + Arg1, Arg2, RetVal)); } } diff --git a/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp b/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp index a0bf776b11f53..e58329817d7cd 100644 --- a/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp @@ -379,9 +379,9 @@ void DynamicTypePropagation::checkPostCall(const CallEvent &Call, // aggregates, and in such case no top-frame constructor will be called. // Figure out if we need to do anything in this case. // FIXME: Instead of relying on the ParentMap, we should have the - // trigger-statement (InitListExpr in this case) available in this - // callback, ideally as part of CallEvent. - if (isa_and_nonnull( + // trigger-statement (InitListExpr or CXXParenListInitExpr in this case) + // available in this callback, ideally as part of CallEvent. + if (isa_and_nonnull( LCtx->getParentMap().getParent(Ctor->getOriginExpr()))) return; diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index 140c77790496d..cfb8be2e7f0f8 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -2510,6 +2510,20 @@ bool ExprEngine::replayWithoutInlining(ExplodedNode *N, return true; } +/// Return the innermost location context which is inlined at `Node`, unless +/// it's the top-level (entry point) location context. +static const LocationContext *getInlinedLocationContext(ExplodedNode *Node, + ExplodedGraph &G) { + const LocationContext *CalleeLC = Node->getLocation().getLocationContext(); + const LocationContext *RootLC = + (*G.roots_begin())->getLocation().getLocationContext(); + + if (CalleeLC->getStackFrame() == RootLC->getStackFrame()) + return nullptr; + + return CalleeLC; +} + /// Block entrance. (Update counters). void ExprEngine::processCFGBlockEntrance(const BlockEdge &L, NodeBuilderWithSinks &nodeBuilder, @@ -2557,21 +2571,24 @@ void ExprEngine::processCFGBlockEntrance(const BlockEdge &L, const ExplodedNode *Sink = nodeBuilder.generateSink(Pred->getState(), Pred, &tag); - // Check if we stopped at the top level function or not. - // Root node should have the location context of the top most function. - const LocationContext *CalleeLC = Pred->getLocation().getLocationContext(); - const LocationContext *CalleeSF = CalleeLC->getStackFrame(); - const LocationContext *RootLC = - (*G.roots_begin())->getLocation().getLocationContext(); - if (RootLC->getStackFrame() != CalleeSF) { - Engine.FunctionSummaries->markReachedMaxBlockCount(CalleeSF->getDecl()); + if (const LocationContext *LC = getInlinedLocationContext(Pred, G)) { + // FIXME: This will unconditionally prevent inlining this function (even + // from other entry points), which is not a reasonable heuristic: even if + // we reached max block count on this particular execution path, there + // may be other execution paths (especially with other parametrizations) + // where the analyzer can reach the end of the function (so there is no + // natural reason to avoid inlining it). However, disabling this would + // significantly increase the analysis time (because more entry points + // would exhaust their allocated budget), so it must be compensated by a + // different (more reasonable) reduction of analysis scope. + Engine.FunctionSummaries->markShouldNotInline( + LC->getStackFrame()->getDecl()); // Re-run the call evaluation without inlining it, by storing the // no-inlining policy in the state and enqueuing the new work item on // the list. Replay should almost never fail. Use the stats to catch it // if it does. - if ((!AMgr.options.NoRetryExhausted && - replayWithoutInlining(Pred, CalleeLC))) + if ((!AMgr.options.NoRetryExhausted && replayWithoutInlining(Pred, LC))) return; NumMaxBlockCountReachedInInlined++; } else @@ -2835,8 +2852,29 @@ void ExprEngine::processBranch( // conflicts with the widen-loop analysis option (which is off by // default). If we intend to support and stabilize the loop widening, // we must ensure that it 'plays nicely' with this logic. - if (!SkipTrueBranch || AMgr.options.ShouldWidenLoops) + if (!SkipTrueBranch || AMgr.options.ShouldWidenLoops) { Builder.generateNode(StTrue, true, PredN); + } else if (!AMgr.options.InlineFunctionsWithAmbiguousLoops) { + // FIXME: There is an ancient and arbitrary heuristic in + // `ExprEngine::processCFGBlockEntrance` which prevents all further + // inlining of a function if it finds an execution path within that + // function which reaches the `MaxBlockVisitOnPath` limit (a/k/a + // `analyzer-max-loop`, by default four iterations in a loop). Adding + // this "don't assume third iteration" logic significantly increased + // the analysis runtime on some inputs because less functions were + // arbitrarily excluded from being inlined, so more entry points used + // up their full allocated budget. As a hacky compensation for this, + // here we apply the "should not inline" mark in cases when the loop + // could potentially reach the `MaxBlockVisitOnPath` limit without the + // "don't assume third iteration" logic. This slightly overcompensates + // (activates if the third iteration can be entered, and will not + // recognize cases where the fourth iteration would't be completed), but + // should be good enough for practical purposes. + if (const LocationContext *LC = getInlinedLocationContext(Pred, G)) { + Engine.FunctionSummaries->markShouldNotInline( + LC->getStackFrame()->getDecl()); + } + } } if (StFalse) diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp index f7020da2e6da2..30839a40389ba 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp @@ -637,9 +637,10 @@ void ExprEngine::handleConstructor(const Expr *E, // FIXME: For now this code essentially bails out. We need to find the // correct target region and set it. // FIXME: Instead of relying on the ParentMap, we should have the - // trigger-statement (InitListExpr in this case) passed down from CFG or - // otherwise always available during construction. - if (isa_and_nonnull(LCtx->getParentMap().getParent(E))) { + // trigger-statement (InitListExpr or CXXParenListInitExpr in this case) + // passed down from CFG or otherwise always available during construction. + if (isa_and_nonnull( + LCtx->getParentMap().getParent(E))) { MemRegionManager &MRMgr = getSValBuilder().getRegionManager(); Target = loc::MemRegionVal(MRMgr.getCXXTempObjectRegion(E, LCtx)); CallOpts.IsCtorOrDtorWithImproperlyModeledTargetRegion = true; @@ -1010,7 +1011,8 @@ void ExprEngine::VisitCXXNewExpr(const CXXNewExpr *CNE, ExplodedNode *Pred, // values are properly placed inside the required region, however if an // initializer list is used, this doesn't happen automatically. auto *Init = CNE->getInitializer(); - bool isInitList = isa_and_nonnull(Init); + bool isInitList = + isa_and_nonnull(Init); QualType ObjTy = isInitList ? Init->getType() : CNE->getType()->getPointeeType(); diff --git a/clang/test/Analysis/PR135665.cpp b/clang/test/Analysis/PR135665.cpp new file mode 100644 index 0000000000000..124b8c9b97b04 --- /dev/null +++ b/clang/test/Analysis/PR135665.cpp @@ -0,0 +1,19 @@ +// RUN: %clang_analyze_cc1 -std=c++20 -analyzer-checker=core -verify %s + +// expected-no-diagnostics + +template +struct overload : public F... +{ + using F::operator()...; +}; + +template +overload(F&&...) -> overload; + +int main() +{ + const auto l = overload([](const int* i) {}); + + return 0; +} diff --git a/clang/test/Analysis/analyzer-config.c b/clang/test/Analysis/analyzer-config.c index d5eb790b82f23..b47ca59e79827 100644 --- a/clang/test/Analysis/analyzer-config.c +++ b/clang/test/Analysis/analyzer-config.c @@ -88,6 +88,7 @@ // CHECK-NEXT: graph-trim-interval = 1000 // CHECK-NEXT: ignore-bison-generated-files = true // CHECK-NEXT: ignore-flex-generated-files = true +// CHECK-NEXT: inline-functions-with-ambiguous-loops = false // CHECK-NEXT: inline-lambdas = true // CHECK-NEXT: ipa = dynamic-bifurcate // CHECK-NEXT: ipa-always-inline-size = 3 diff --git a/clang/test/Analysis/builtin_overflow.c b/clang/test/Analysis/builtin_overflow.c index 9d98ce7a1af45..d290333071dc9 100644 --- a/clang/test/Analysis/builtin_overflow.c +++ b/clang/test/Analysis/builtin_overflow.c @@ -26,7 +26,7 @@ void test_add_overflow(void) int res; if (__builtin_add_overflow(__INT_MAX__, 1, &res)) { - clang_analyzer_dump_int(res); //expected-warning{{1st function call argument is an uninitialized value}} + clang_analyzer_dump_int(res); //expected-warning{{-2147483648 S32b}} return; } @@ -38,7 +38,7 @@ void test_add_underoverflow(void) int res; if (__builtin_add_overflow(__INT_MIN__, -1, &res)) { - clang_analyzer_dump_int(res); //expected-warning{{1st function call argument is an uninitialized value}} + clang_analyzer_dump_int(res); //expected-warning{{2147483647 S32b}} return; } @@ -160,7 +160,7 @@ void test_bool_assign(void) { int res; - // Reproduce issue from GH#111147. __builtin_*_overflow funcions + // Reproduce issue from GH#111147. __builtin_*_overflow functions // should return _Bool, but not int. _Bool ret = __builtin_mul_overflow(10, 20, &res); // no crash } diff --git a/clang/test/Analysis/builtin_overflow_notes.c b/clang/test/Analysis/builtin_overflow_notes.c index 20f333a4a6cca..94c79b5ed334a 100644 --- a/clang/test/Analysis/builtin_overflow_notes.c +++ b/clang/test/Analysis/builtin_overflow_notes.c @@ -19,12 +19,16 @@ void test_no_overflow_note(int a, int b) void test_overflow_note(int a, int b) { - int res; // expected-note{{'res' declared without an initial value}} + int res; if (__builtin_add_overflow(a, b, &res)) { // expected-note {{Assuming overflow}} // expected-note@-1 {{Taking true branch}} - int var = res; // expected-warning{{Assigned value is garbage or undefined}} - // expected-note@-1 {{Assigned value is garbage or undefined}} + if (res) { // expected-note {{Assuming 'res' is not equal to 0}} + // expected-note@-1 {{Taking true branch}} + int *ptr = 0; // expected-note {{'ptr' initialized to a null pointer value}} + int var = *(int *) ptr; //expected-warning {{Dereference of null pointer}} + //expected-note@-1 {{Dereference of null pointer}} + } return; } } diff --git a/clang/test/Analysis/live-stmts.cpp b/clang/test/Analysis/live-stmts.cpp index c60f522588e39..ca2ff6da8b133 100644 --- a/clang/test/Analysis/live-stmts.cpp +++ b/clang/test/Analysis/live-stmts.cpp @@ -44,6 +44,8 @@ int testThatDumperWorks(int x, int y, int z) { // CHECK-NEXT: ImplicitCastExpr {{.*}} // CHECK-NEXT: `-ImplicitCastExpr {{.*}} // CHECK-NEXT: `-DeclRefExpr {{.*}} 'x' 'int' +// CHECK-EMPTY: +// CHECK-EMPTY: // CHECK: [ B4 (live expressions at block exit) ] // CHECK-EMPTY: // CHECK-NEXT: DeclRefExpr {{.*}} 'y' 'int' diff --git a/clang/test/Analysis/loop-based-inlining-prevention.c b/clang/test/Analysis/loop-based-inlining-prevention.c new file mode 100644 index 0000000000000..73627112e2d32 --- /dev/null +++ b/clang/test/Analysis/loop-based-inlining-prevention.c @@ -0,0 +1,200 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -verify=expected,default %s +// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config inline-functions-with-ambiguous-loops=true -verify=expected,enabled %s + +// This file tests some heuristics in the engine that put functions on a +// "do not inline" list if their analyisis reaches the `analyzer-max-loop` +// limit (by default 4 iterations) in a loop. This was almost surely intended +// as memoization optimization for the "retry without inlining" fallback (if we +// had to retry once, next time don't even try inlining), but aggressively +// oversteps the "natural" scope: reaching 4 iterations on _one particular_ +// execution path does not imply that each path would need "retry without +// inlining" especially if a different call receives different arguments. +// +// This heuristic significantly affects the scope/depth of the analysis (and +// therefore the execution time) because without this limitation on the +// inlining significantly more entry points would be able to exhaust their +// `max-nodes` quota. (Trivial thin wrappers around big complex functions are +// common in many projects.) +// +// Unfortunately, this arbitrary heuristic strongly relies on the current loop +// handling model and its many limitations, so improvements in loop handling +// can cause surprising slowdowns by reducing the "do not inline" blacklist. +// In the tests "FIXME-BUT-NEEDED" comments mark "problematic" (aka buggy) +// analyzer behavior which cannot be fixed without also improving the +// heuristics for (not) inlining large functions. + + int getNum(void); // Get an unknown symbolic number. + +void clang_analyzer_dump(int arg); + +//----------------------------------------------------------------------------- +// Simple case: inlined function never reaches `analyzer-max-loop`, so it is +// always inlined. + +int inner_simple(int callIdx) { + clang_analyzer_dump(callIdx); // expected-warning {{1 S32}} + // expected-warning@-1 {{2 S32}} + return 42; +} + +int outer_simple(void) { + int x = inner_simple(1); + int y = inner_simple(2); + return 53 / (x - y); // expected-warning {{Division by zero}} +} + +//----------------------------------------------------------------------------- +// Inlined function always reaches `analyzer-max-loop`, which stops the +// analysis on that path and puts the function on the "do not inline" list. + +int inner_fixed_loop_1(int callIdx) { + int i; + clang_analyzer_dump(callIdx); // expected-warning {{1 S32}} + for (i = 0; i < 10; i++); // FIXME-BUT-NEEDED: This stops the analysis. + clang_analyzer_dump(callIdx); // no-warning + return 42; +} + +int outer_fixed_loop_1(void) { + int x = inner_fixed_loop_1(1); + int y = inner_fixed_loop_1(2); + + // FIXME-BUT-NEEDED: The analysis doesn't reach this zero division. + return 53 / (x - y); // no-warning +} + +//----------------------------------------------------------------------------- +// Inlined function always reaches `analyzer-max-loop`; inlining is prevented +// even for different entry points. +// NOTE: the analyzer happens to analyze the entry points in a reversed order, +// so `outer_2_fixed_loop_2` is analyzed first and it will be the one which is +// able to inline the inner function. + +int inner_fixed_loop_2(int callIdx) { + // Identical copy of inner_fixed_loop_1. + int i; + clang_analyzer_dump(callIdx); // expected-warning {{2 S32}} + for (i = 0; i < 10; i++); // FIXME-BUT-NEEDED: This stops the analysis. + clang_analyzer_dump(callIdx); // no-warning + return 42; +} + +int outer_1_fixed_loop_2(void) { + return inner_fixed_loop_2(1); +} + +int outer_2_fixed_loop_2(void) { + return inner_fixed_loop_2(2); +} + +//----------------------------------------------------------------------------- +// Inlined function reaches `analyzer-max-loop` only in its second call. The +// function is inlined twice but the second call doesn't finish and ends up +// being conservatively evaluated. + +int inner_parametrized_loop_1(int count) { + int i; + clang_analyzer_dump(count); // expected-warning {{2 S32}} + // expected-warning@-1 {{10 S32}} + for (i = 0; i < count; i++); + // FIXME-BUT-NEEDED: This loop stops the analysis when count >=4. + clang_analyzer_dump(count); // expected-warning {{2 S32}} + return 42; +} + +int outer_parametrized_loop_1(void) { + int x = inner_parametrized_loop_1(2); + int y = inner_parametrized_loop_1(10); + + // FIXME-BUT-NEEDED: The analysis doesn't reach this zero division. + return 53 / (x - y); // no-warning +} + +//----------------------------------------------------------------------------- +// Inlined function reaches `analyzer-max-loop` on its first call, so the +// second call isn't inlined (although it could be fully evaluated). + +int inner_parametrized_loop_2(int count) { + // Identical copy of inner_parametrized_loop_1. + int i; + clang_analyzer_dump(count); // expected-warning {{10 S32}} + for (i = 0; i < count; i++); + // FIXME-BUT-NEEDED: This loop stops the analysis when count >=4. + clang_analyzer_dump(count); // no-warning + return 42; +} + +int outer_parametrized_loop_2(void) { + int y = inner_parametrized_loop_2(10); + int x = inner_parametrized_loop_2(2); + + // FIXME-BUT-NEEDED: The analysis doesn't reach this zero division. + return 53 / (x - y); // no-warning +} + +//----------------------------------------------------------------------------- +// Inlined function may or may not reach `analyzer-max-loop` depending on an +// ambiguous check before the loop. This is very similar to the "fixed loop" +// cases: the function is placed on the "don't inline" list when any execution +// path reaches `analyzer-max-loop` (even if other execution paths reach the +// end of the function). +// NOTE: This is tested with two separate entry points to ensure that one +// inlined call is fully evaluated before we try to inline the other call. +// NOTE: the analyzer happens to analyze the entry points in a reversed order, +// so `outer_2_conditional_loop` is analyzed first and it will be the one which +// is able to inline the inner function. + +int inner_conditional_loop(int callIdx) { + int i; + clang_analyzer_dump(callIdx); // expected-warning {{2 S32}} + if (getNum() == 777) { + for (i = 0; i < 10; i++); + } + clang_analyzer_dump(callIdx); // expected-warning {{2 S32}} + return 42; +} + +int outer_1_conditional_loop(void) { + return inner_conditional_loop(1); +} + +int outer_2_conditional_loop(void) { + return inner_conditional_loop(2); +} + +//----------------------------------------------------------------------------- +// Inlined function executes an ambiguous loop that may or may not reach +// `analyzer-max-loop`. Historically, before the "don't assume third iteration" +// commit (bb27d5e5c6b194a1440b8ac4e5ace68d0ee2a849) this worked like the +// `conditional_loop` cases: the analyzer was able to find a path reaching +// `analyzer-max-loop` so inlining was disabled. After that commit the analyzer +// does not _assume_ a third (or later) iteration (i.e. does not enter those +// iterations if the loop condition is an unknown value), so e.g. this test +// function does not reach `analyzer-max-loop` iterations and the inlining is +// not disabled. +// Unfortunately this change significantly increased the workload and +// runtime of the analyzer (more entry points used up their budget), so the +// option `inline-functions-with-ambiguous-loops` was introduced and disabled +// by default to suppress the inlining in situations where the "don't assume +// third iteration" logic activates. +// NOTE: This is tested with two separate entry points to ensure that one +// inlined call is fully evaluated before we try to inline the other call. +// NOTE: the analyzer happens to analyze the entry points in a reversed order, +// so `outer_2_ambiguous_loop` is analyzed first and it will be the one which +// is able to inline the inner function. + +int inner_ambiguous_loop(int callIdx) { + int i; + clang_analyzer_dump(callIdx); // default-warning {{2 S32}} + // enabled-warning@-1 {{1 S32}} + // enabled-warning@-2 {{2 S32}} + for (i = 0; i < getNum(); i++); + return i; +} + +int outer_1_ambiguous_loop(void) { + return inner_ambiguous_loop(1); +} +int outer_2_ambiguous_loop(void) { + return inner_ambiguous_loop(2); +} diff --git a/clang/test/Analysis/loop-unrolling.cpp b/clang/test/Analysis/loop-unrolling.cpp index bf05a7739ce48..ebae81e000c7a 100644 --- a/clang/test/Analysis/loop-unrolling.cpp +++ b/clang/test/Analysis/loop-unrolling.cpp @@ -1,5 +1,5 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config unroll-loops=true,cfg-loopexit=true -verify -std=c++14 -analyzer-config exploration_strategy=unexplored_first_queue %s -// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config unroll-loops=true,cfg-loopexit=true,exploration_strategy=dfs -verify -std=c++14 -DDFS=1 %s +// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config unroll-loops=true,cfg-loopexit=true -verify=expected,default -std=c++14 -analyzer-config exploration_strategy=unexplored_first_queue %s +// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -analyzer-config unroll-loops=true,cfg-loopexit=true,exploration_strategy=dfs -verify=expected,dfs -std=c++14 %s void clang_analyzer_numTimesReached(); void clang_analyzer_warnIfReached(); @@ -337,6 +337,7 @@ int nested_both_unrolled() { } int simple_known_bound_loop() { + // Iteration count visible: can be unrolled and fully executed. for (int i = 2; i < 12; i++) { // This function is inlined in nested_inlined_unroll1() clang_analyzer_numTimesReached(); // expected-warning {{90}} @@ -345,27 +346,42 @@ int simple_known_bound_loop() { } int simple_unknown_bound_loop() { + // Iteration count unknown: unrolling won't happen and the execution will be + // split two times: + // (1) split between skipped loop (immediate exit) and entering the loop + // (2) split between exit after 1 iteration and entering the second iteration + // After these there is no third state split because the "don't assume third + // iteration" logic in `ExprEngine::processBranch` prevents it; but the + // `legacy-inlining-prevention` logic will put this function onto the list of + // functions that may not be inlined in the future. + // The exploration strategy apparently influences the number of times this + // function can be inlined before it's placed on the "don't inline" list. for (int i = 2; i < getNum(); i++) { - clang_analyzer_numTimesReached(); // expected-warning {{8}} + clang_analyzer_numTimesReached(); // default-warning {{4}} dfs-warning {{8}} } return 0; } int nested_inlined_unroll1() { + // Here the analyzer can unroll and fully execute both the outer loop and the + // inner loop within simple_known_bound_loop(). int k; for (int i = 0; i < 9; i++) { clang_analyzer_numTimesReached(); // expected-warning {{9}} - k = simple_known_bound_loop(); // no reevaluation without inlining + k = simple_known_bound_loop(); } int a = 22 / k; // expected-warning {{Division by zero}} return 0; } int nested_inlined_no_unroll1() { + // Here no unrolling happens and we only run `analyzer-max-loop` (= 4) + // iterations of the loop within this function, but some state splits happen + // in `simple_unknown_bound_loop()` calls. int k; - for (int i = 0; i < 9; i++) { - clang_analyzer_numTimesReached(); // expected-warning {{10}} - k = simple_unknown_bound_loop(); // reevaluation without inlining, splits the state as well + for (int i = 0; i < 40; i++) { + clang_analyzer_numTimesReached(); // default-warning {{9}} dfs-warning {{12}} + k = simple_unknown_bound_loop(); } int a = 22 / k; // no-warning return 0; diff --git a/clang/test/CodeCompletion/GH139019.cpp b/clang/test/CodeCompletion/GH139019.cpp new file mode 100644 index 0000000000000..fed35b38362a1 --- /dev/null +++ b/clang/test/CodeCompletion/GH139019.cpp @@ -0,0 +1,26 @@ +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/test.hpp -emit-pch -o %t/1.pch +// RUN: %clang_cc1 -std=c++20 %t/test.cpp -include-pch %t/1.pch -code-completion-at=%t/test.cpp:7:17 + +//--- test.hpp +#pragma once +class provider_t +{ + public: + template + void emit(T *data) + {} +}; + +//--- test.cpp +#include "test.hpp" + +void test() +{ + provider_t *focus; + void *data; + focus->emit(&data); +} diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-bitop.c b/clang/test/CodeGen/SystemZ/builtins-systemz-bitop.c index 5b4051c8d6f17..717a7d7ab49e2 100644 --- a/clang/test/CodeGen/SystemZ/builtins-systemz-bitop.c +++ b/clang/test/CodeGen/SystemZ/builtins-systemz-bitop.c @@ -1,6 +1,6 @@ // REQUIRES: systemz-registered-target -// RUN: %clang_cc1 -target-cpu arch15 -triple s390x-ibm-linux -Wall -Wno-unused -Werror -emit-llvm %s -o - | FileCheck %s -// RUN: %clang_cc1 -target-cpu arch15 -triple s390x-ibm-linux -Wall -Wno-unused -Werror -emit-llvm -x c++ %s -o - | FileCheck %s +// RUN: %clang_cc1 -target-cpu z17 -triple s390x-ibm-linux -Wall -Wno-unused -Werror -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -target-cpu z17 -triple s390x-ibm-linux -Wall -Wno-unused -Werror -emit-llvm -x c++ %s -o - | FileCheck %s unsigned long test_bdepg(unsigned long a, unsigned long b) { // CHECK-LABEL: test_bdepg diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-vector5-error.c b/clang/test/CodeGen/SystemZ/builtins-systemz-vector5-error.c index 3943a15af9d2f..8275b9ddb88a8 100644 --- a/clang/test/CodeGen/SystemZ/builtins-systemz-vector5-error.c +++ b/clang/test/CodeGen/SystemZ/builtins-systemz-vector5-error.c @@ -1,5 +1,5 @@ // REQUIRES: systemz-registered-target -// RUN: %clang_cc1 -target-cpu arch15 -triple s390x-unknown-unknown \ +// RUN: %clang_cc1 -target-cpu z17 -triple s390x-unknown-unknown \ // RUN: -Wall -Wno-unused -Werror -fsyntax-only -verify %s typedef __attribute__((vector_size(16))) signed char vec_schar; diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-vector5.c b/clang/test/CodeGen/SystemZ/builtins-systemz-vector5.c index c3621819e71f9..b765fa64b33d4 100644 --- a/clang/test/CodeGen/SystemZ/builtins-systemz-vector5.c +++ b/clang/test/CodeGen/SystemZ/builtins-systemz-vector5.c @@ -1,5 +1,5 @@ // REQUIRES: systemz-registered-target -// RUN: %clang_cc1 -target-cpu arch15 -triple s390x-ibm-linux -flax-vector-conversions=none \ +// RUN: %clang_cc1 -target-cpu z17 -triple s390x-ibm-linux -flax-vector-conversions=none \ // RUN: -Wall -Wno-unused -Werror -emit-llvm %s -o - | FileCheck %s typedef __attribute__((vector_size(16))) signed char vec_schar; diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector5-error.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector5-error.c index 9f4844efd6312..79041b923068e 100644 --- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector5-error.c +++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector5-error.c @@ -1,5 +1,5 @@ // REQUIRES: systemz-registered-target -// RUN: %clang_cc1 -target-cpu arch15 -triple s390x-linux-gnu \ +// RUN: %clang_cc1 -target-cpu z17 -triple s390x-linux-gnu \ // RUN: -fzvector -flax-vector-conversions=none \ // RUN: -Wall -Wno-unused -Werror -fsyntax-only -verify %s diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector5.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector5.c index 7a29dbf552e0b..6ee9e1ee3a117 100644 --- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector5.c +++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector5.c @@ -1,8 +1,8 @@ // REQUIRES: systemz-registered-target -// RUN: %clang_cc1 -target-cpu arch15 -triple s390x-linux-gnu \ +// RUN: %clang_cc1 -target-cpu z17 -triple s390x-linux-gnu \ // RUN: -O2 -fzvector -flax-vector-conversions=none \ // RUN: -Wall -Wno-unused -Werror -emit-llvm %s -o - | FileCheck %s -// RUN: %clang_cc1 -target-cpu arch15 -triple s390x-linux-gnu \ +// RUN: %clang_cc1 -target-cpu z17 -triple s390x-linux-gnu \ // RUN: -O2 -fzvector -flax-vector-conversions=none \ // RUN: -Wall -Wno-unused -Werror -S %s -o - | FileCheck %s --check-prefix=CHECK-ASM diff --git a/clang/test/CodeGen/SystemZ/systemz-abi-vector.c b/clang/test/CodeGen/SystemZ/systemz-abi-vector.c index 1e1926678ec33..e5704709a3a33 100644 --- a/clang/test/CodeGen/SystemZ/systemz-abi-vector.c +++ b/clang/test/CodeGen/SystemZ/systemz-abi-vector.c @@ -18,6 +18,8 @@ // RUN: -emit-llvm -o - %s | FileCheck --check-prefix=CHECK-VECTOR %s // RUN: %clang_cc1 -no-enable-noundef-analysis -triple s390x-linux-gnu -target-cpu arch14 \ // RUN: -emit-llvm -o - %s | FileCheck --check-prefix=CHECK-VECTOR %s +// RUN: %clang_cc1 -no-enable-noundef-analysis -triple s390x-linux-gnu -target-cpu z17 \ +// RUN: -emit-llvm -o - %s | FileCheck --check-prefix=CHECK-VECTOR %s // RUN: %clang_cc1 -no-enable-noundef-analysis -triple s390x-linux-gnu -target-cpu arch15 \ // RUN: -emit-llvm -o - %s | FileCheck --check-prefix=CHECK-VECTOR %s diff --git a/clang/test/CodeGen/SystemZ/systemz-abi.c b/clang/test/CodeGen/SystemZ/systemz-abi.c index 58081bdc6cc2a..7de425950e9fd 100644 --- a/clang/test/CodeGen/SystemZ/systemz-abi.c +++ b/clang/test/CodeGen/SystemZ/systemz-abi.c @@ -24,6 +24,8 @@ // RUN: %clang_cc1 -no-enable-noundef-analysis -triple s390x-linux-gnu -target-cpu arch14 \ // RUN: -emit-llvm -o - %s -mfloat-abi soft | FileCheck %s \ // RUN: --check-prefixes=CHECK,SOFT-FLOAT +// RUN: %clang_cc1 -no-enable-noundef-analysis -triple s390x-linux-gnu -target-cpu z17 \ +// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,HARD-FLOAT // RUN: %clang_cc1 -no-enable-noundef-analysis -triple s390x-linux-gnu -target-cpu arch15 \ // RUN: -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,HARD-FLOAT // RUN: %clang_cc1 -no-enable-noundef-analysis -triple s390x-linux-gnu -target-cpu arch15 \ diff --git a/clang/test/CodeGen/X86/avx10_2_512convert-builtins.c b/clang/test/CodeGen/X86/avx10_2_512convert-builtins.c index dcf7bbc005a7c..3ac7c2cc8716f 100644 --- a/clang/test/CodeGen/X86/avx10_2_512convert-builtins.c +++ b/clang/test/CodeGen/X86/avx10_2_512convert-builtins.c @@ -59,22 +59,22 @@ __m256i test_mm512_maskz_cvtbiasph_bf8(__mmask32 __U, __m512i __A, __m512h __B) return _mm512_maskz_cvtbiasph_bf8(__U, __A, __B); } -__m256i test_mm512_cvtbiassph_bf8(__m512i __A, __m512h __B) { - // CHECK-LABEL: @test_mm512_cvtbiassph_bf8( +__m256i test_mm512_cvts_biasph_bf8(__m512i __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_cvts_biasph_bf8( // CHECK: call <32 x i8> @llvm.x86.avx10.mask.vcvtbiasph2bf8s512( - return _mm512_cvtbiassph_bf8(__A, __B); + return _mm512_cvts_biasph_bf8(__A, __B); } -__m256i test_mm512_mask_cvtbiassph_bf8(__m256i __W, __mmask32 __U, __m512i __A, __m512h __B) { - // CHECK-LABEL: @test_mm512_mask_cvtbiassph_bf8( +__m256i test_mm512_mask_cvts_biasph_bf8(__m256i __W, __mmask32 __U, __m512i __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_mask_cvts_biasph_bf8( // CHECK: call <32 x i8> @llvm.x86.avx10.mask.vcvtbiasph2bf8s512( - return _mm512_mask_cvtbiassph_bf8(__W, __U, __A, __B); + return _mm512_mask_cvts_biasph_bf8(__W, __U, __A, __B); } -__m256i test_mm512_maskz_cvtbiassph_bf8(__mmask32 __U, __m512i __A, __m512h __B) { - // CHECK-LABEL: @test_mm512_maskz_cvtbiassph_bf8( +__m256i test_mm512_maskz_cvts_biasph_bf8(__mmask32 __U, __m512i __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_maskz_cvts_biasph_bf8( // CHECK: call <32 x i8> @llvm.x86.avx10.mask.vcvtbiasph2bf8s512( - return _mm512_maskz_cvtbiassph_bf8(__U, __A, __B); + return _mm512_maskz_cvts_biasph_bf8(__U, __A, __B); } __m256i test_mm512_cvtbiasph_hf8(__m512i __A, __m512h __B) { @@ -95,22 +95,22 @@ __m256i test_mm512_maskz_cvtbiasph_hf8(__mmask32 __U, __m512i __A, __m512h __B) return _mm512_maskz_cvtbiasph_hf8(__U, __A, __B); } -__m256i test_mm512_cvtbiassph_hf8(__m512i __A, __m512h __B) { - // CHECK-LABEL: @test_mm512_cvtbiassph_hf8( +__m256i test_mm512_cvts_biasph_hf8(__m512i __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_cvts_biasph_hf8( // CHECK: call <32 x i8> @llvm.x86.avx10.mask.vcvtbiasph2hf8s512( - return _mm512_cvtbiassph_hf8(__A, __B); + return _mm512_cvts_biasph_hf8(__A, __B); } -__m256i test_mm512_mask_cvtbiassph_hf8(__m256i __W, __mmask32 __U, __m512i __A, __m512h __B) { - // CHECK-LABEL: @test_mm512_mask_cvtbiassph_hf8( +__m256i test_mm512_mask_cvts_biasph_hf8(__m256i __W, __mmask32 __U, __m512i __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_mask_cvts_biasph_hf8( // CHECK: call <32 x i8> @llvm.x86.avx10.mask.vcvtbiasph2hf8s512( - return _mm512_mask_cvtbiassph_hf8(__W, __U, __A, __B); + return _mm512_mask_cvts_biasph_hf8(__W, __U, __A, __B); } -__m256i test_mm512_maskz_cvtbiassph_hf8(__mmask32 __U, __m512i __A, __m512h __B) { - // CHECK-LABEL: @test_mm512_maskz_cvtbiassph_hf8( +__m256i test_mm512_maskz_cvts_biasph_hf8(__mmask32 __U, __m512i __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_maskz_cvts_biasph_hf8( // CHECK: call <32 x i8> @llvm.x86.avx10.mask.vcvtbiasph2hf8s512( - return _mm512_maskz_cvtbiassph_hf8(__U, __A, __B); + return _mm512_maskz_cvts_biasph_hf8(__U, __A, __B); } __m512i test_mm512_cvt2ph_bf8(__m512h __A, __m512h __B) { @@ -135,26 +135,26 @@ __m512i test_mm512_maskz_cvt2ph_bf8(__mmask32 __U, __m512h __A, __m512h __B) { return _mm512_maskz_cvt2ph_bf8(__U, __A, __B); } -__m512i test_mm512_cvts2ph_bf8(__m512h __A, __m512h __B) { - // CHECK-LABEL: @test_mm512_cvts2ph_bf8( +__m512i test_mm512_cvts_2ph_bf8(__m512h __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_cvts_2ph_bf8( // CHECK: call <64 x i8> @llvm.x86.avx10.vcvt2ph2bf8s512( - return _mm512_cvts2ph_bf8(__A, __B); + return _mm512_cvts_2ph_bf8(__A, __B); } -__m512i test_mm512_mask_cvts2ph_bf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) { - // CHECK-LABEL: @test_mm512_mask_cvts2ph_bf8( +__m512i test_mm512_mask_cvts_2ph_bf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_mask_cvts_2ph_bf8( // CHECK: call <64 x i8> @llvm.x86.avx10.vcvt2ph2bf8s512( // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}} // CHECK: ret <8 x i64> %{{.*}} - return _mm512_mask_cvts2ph_bf8(__W, __U, __A, __B); + return _mm512_mask_cvts_2ph_bf8(__W, __U, __A, __B); } -__m512i test_mm512_maskz_cvts2ph_bf8(__mmask64 __U, __m512h __A, __m512h __B) { - // CHECK-LABEL: @test_mm512_maskz_cvts2ph_bf8( +__m512i test_mm512_maskz_cvts_2ph_bf8(__mmask64 __U, __m512h __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_maskz_cvts_2ph_bf8( // CHECK: call <64 x i8> @llvm.x86.avx10.vcvt2ph2bf8s512( // CHECK: zeroinitializer // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}} - return _mm512_maskz_cvts2ph_bf8(__U, __A, __B); + return _mm512_maskz_cvts_2ph_bf8(__U, __A, __B); } __m512i test_mm512_cvt2ph_hf8(__m512h __A, __m512h __B) { @@ -179,26 +179,26 @@ __m512i test_mm512_maskz_cvt2ph_hf8(__mmask64 __U, __m512h __A, __m512h __B) { return _mm512_maskz_cvt2ph_hf8(__U, __A, __B); } -__m512i test_mm512_cvts2ph_hf8(__m512h __A, __m512h __B) { - // CHECK-LABEL: @test_mm512_cvts2ph_hf8( +__m512i test_mm512_cvts_2ph_hf8(__m512h __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_cvts_2ph_hf8( // CHECK: call <64 x i8> @llvm.x86.avx10.vcvt2ph2hf8s512( - return _mm512_cvts2ph_hf8(__A, __B); + return _mm512_cvts_2ph_hf8(__A, __B); } -__m512i test_mm512_mask_cvts2ph_hf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) { - // CHECK-LABEL: @test_mm512_mask_cvts2ph_hf8( +__m512i test_mm512_mask_cvts_2ph_hf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_mask_cvts_2ph_hf8( // CHECK: call <64 x i8> @llvm.x86.avx10.vcvt2ph2hf8s512( // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}} // CHECK: ret <8 x i64> %{{.*}} - return _mm512_mask_cvts2ph_hf8(__W, __U, __A, __B); + return _mm512_mask_cvts_2ph_hf8(__W, __U, __A, __B); } -__m512i test_mm512_maskz_cvts2ph_hf8(__mmask64 __U, __m512h __A, __m512h __B) { - // CHECK-LABEL: @test_mm512_maskz_cvts2ph_hf8( +__m512i test_mm512_maskz_cvts_2ph_hf8(__mmask64 __U, __m512h __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_maskz_cvts_2ph_hf8( // CHECK: call <64 x i8> @llvm.x86.avx10.vcvt2ph2hf8s512( // CHECK: zeroinitializer // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}} - return _mm512_maskz_cvts2ph_hf8(__U, __A, __B); + return _mm512_maskz_cvts_2ph_hf8(__U, __A, __B); } __m512h test_mm512_cvthf8_ph(__m256i __A) { @@ -237,22 +237,22 @@ __m256i test_mm512_maskz_cvtph_bf8(__mmask32 __A, __m512h __B) { return _mm512_maskz_cvtph_bf8(__A, __B); } -__m256i test_mm512_cvtsph_bf8(__m512h __A) { - // CHECK-LABEL: @test_mm512_cvtsph_bf8( +__m256i test_mm512_cvts_ph_bf8(__m512h __A) { + // CHECK-LABEL: @test_mm512_cvts_ph_bf8( // CHECK: call <32 x i8> @llvm.x86.avx10.mask.vcvtph2bf8s512( - return _mm512_cvtsph_bf8(__A); + return _mm512_cvts_ph_bf8(__A); } -__m256i test_mm512_mask_cvtsph_bf8(__m256i __A, __mmask32 __B, __m512h __C) { - // CHECK-LABEL: @test_mm512_mask_cvtsph_bf8( +__m256i test_mm512_mask_cvts_ph_bf8(__m256i __A, __mmask32 __B, __m512h __C) { + // CHECK-LABEL: @test_mm512_mask_cvts_ph_bf8( // CHECK: call <32 x i8> @llvm.x86.avx10.mask.vcvtph2bf8s512( - return _mm512_mask_cvtsph_bf8(__A, __B, __C); + return _mm512_mask_cvts_ph_bf8(__A, __B, __C); } -__m256i test_mm512_maskz_cvtsph_bf8(__mmask32 __A, __m512h __B) { - // CHECK-LABEL: @test_mm512_maskz_cvtsph_bf8( +__m256i test_mm512_maskz_cvts_ph_bf8(__mmask32 __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_maskz_cvts_ph_bf8( // CHECK: call <32 x i8> @llvm.x86.avx10.mask.vcvtph2bf8s512( - return _mm512_maskz_cvtsph_bf8(__A, __B); + return _mm512_maskz_cvts_ph_bf8(__A, __B); } __m256i test_mm512_cvtph_hf8(__m512h __A) { @@ -273,22 +273,22 @@ __m256i test_mm512_maskz_cvtph_hf8(__mmask32 __A, __m512h __B) { return _mm512_maskz_cvtph_hf8(__A, __B); } -__m256i test_mm512_cvtsph_hf8(__m512h __A) { - // CHECK-LABEL: @test_mm512_cvtsph_hf8( +__m256i test_mm512_cvts_ph_hf8(__m512h __A) { + // CHECK-LABEL: @test_mm512_cvts_ph_hf8( // CHECK: call <32 x i8> @llvm.x86.avx10.mask.vcvtph2hf8s512( - return _mm512_cvtsph_hf8(__A); + return _mm512_cvts_ph_hf8(__A); } -__m256i test_mm512_mask_cvtsph_hf8(__m256i __A, __mmask32 __B, __m512h __C) { - // CHECK-LABEL: @test_mm512_mask_cvtsph_hf8( +__m256i test_mm512_mask_cvts_ph_hf8(__m256i __A, __mmask32 __B, __m512h __C) { + // CHECK-LABEL: @test_mm512_mask_cvts_ph_hf8( // CHECK: call <32 x i8> @llvm.x86.avx10.mask.vcvtph2hf8s512( - return _mm512_mask_cvtsph_hf8(__A, __B, __C); + return _mm512_mask_cvts_ph_hf8(__A, __B, __C); } -__m256i test_mm512_maskz_cvtsph_hf8(__mmask32 __A, __m512h __B) { - // CHECK-LABEL: @test_mm512_maskz_cvtsph_hf8( +__m256i test_mm512_maskz_cvts_ph_hf8(__mmask32 __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_maskz_cvts_ph_hf8( // CHECK: call <32 x i8> @llvm.x86.avx10.mask.vcvtph2hf8s512( - return _mm512_maskz_cvtsph_hf8(__A, __B); + return _mm512_maskz_cvts_ph_hf8(__A, __B); } __m512h test_mm512_cvtbf8_ph(__m256i A) { diff --git a/clang/test/CodeGen/X86/avx10_2_512minmax-error.c b/clang/test/CodeGen/X86/avx10_2_512minmax-error.c index 6db7801eb0040..2ee496d317a5a 100644 --- a/clang/test/CodeGen/X86/avx10_2_512minmax-error.c +++ b/clang/test/CodeGen/X86/avx10_2_512minmax-error.c @@ -113,17 +113,6 @@ __m512 test_mm512_minmax_round_ps(__m512 __A, __m512 __B) { return _mm512_minmax_round_ps(__A, __B, 127, 11); // expected-error {{invalid rounding argument}} } -__m256d test_mm256_minmax_round_pd(__m256d __A, __m256d __B) { - return _mm256_minmax_round_pd(__A, __B, 127, 11); // expected-error {{invalid rounding argument}} -} - -__m256h test_mm256_minmax_round_ph(__m256h __A, __m256h __B) { - return _mm256_minmax_round_ph(__A, __B, 127, 11); // expected-error {{invalid rounding argument}} -} - -__m256 test_mm256_minmax_round_ps(__m256 __A, __m256 __B) { - return _mm256_minmax_round_ps(__A, __B, 127, 11); // expected-error {{invalid rounding argument}} -} __m128d test_mm_minmax_round_sd(__m128d __A, __m128d __B) { return _mm_minmax_round_sd(__A, __B, 127, 11); // expected-error {{invalid rounding argument}} } diff --git a/clang/test/CodeGen/X86/avx10_2_512satcvt-builtins-error.c b/clang/test/CodeGen/X86/avx10_2_512satcvt-builtins-error.c deleted file mode 100755 index 81bf59153e678..0000000000000 --- a/clang/test/CodeGen/X86/avx10_2_512satcvt-builtins-error.c +++ /dev/null @@ -1,198 +0,0 @@ -// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64 -target-feature +avx10.2-512 \ -// RUN: -Wall -Werror -verify -// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=i386 -target-feature +avx10.2-512 \ -// RUN: -Wall -Werror -verify - -#include - -__m512i test_mm512_ipcvt_roundph_epi8(__m512h __A) { - return _mm512_ipcvt_roundph_epi8(__A, 22); // expected-error {{invalid rounding argument}} -} - -__m512i test_mm512_mask_ipcvt_roundph_epi8(__m512i __S, __mmask32 __A, __m512h __B) { - return _mm512_mask_ipcvt_roundph_epi8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}} -} - -__m512i test_mm512_maskz_ipcvt_roundph_epi8(__mmask32 __A, __m512h __B) { - return _mm512_maskz_ipcvt_roundph_epi8(__A, __B, 22); // expected-error {{invalid rounding argument}} -} - -__m512i test_mm512_ipcvt_roundph_epu8(__m512h __A) { - return _mm512_ipcvt_roundph_epu8(__A, 22); // expected-error {{invalid rounding argument}} -} - -__m512i test_mm512_mask_ipcvt_roundph_epu8(__m512i __S, __mmask32 __A, __m512h __B) { - return _mm512_mask_ipcvt_roundph_epu8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}} -} - -__m512i test_mm512_maskz_ipcvt_roundph_epu8(__mmask32 __A, __m512h __B) { - return _mm512_maskz_ipcvt_roundph_epu8(__A, __B, 22); // expected-error {{invalid rounding argument}} -} - -__m512i test_mm512_ipcvt_roundps_epi8(__m512 __A) { - return _mm512_ipcvt_roundps_epi8(__A, 22); // expected-error {{invalid rounding argument}} -} - -__m512i test_mm512_mask_ipcvt_roundps_epi8(__m512i __S, __mmask16 __A, __m512 __B) { - return _mm512_mask_ipcvt_roundps_epi8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}} -} - -__m512i test_mm512_maskz_ipcvt_roundps_epi8(__mmask16 __A, __m512 __B) { - return _mm512_maskz_ipcvt_roundps_epi8(__A, __B, 22); // expected-error {{invalid rounding argument}} -} - -__m512i test_mm512_ipcvt_roundps_epu8(__m512 __A) { - return _mm512_ipcvt_roundps_epu8(__A, 22); // expected-error {{invalid rounding argument}} -} - -__m512i test_mm512_mask_ipcvt_roundps_epu8(__m512i __S, __mmask16 __A, __m512 __B) { - return _mm512_mask_ipcvt_roundps_epu8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}} -} - -__m512i test_mm512_maskz_ipcvt_roundps_epu8(__mmask16 __A, __m512 __B) { - return _mm512_maskz_ipcvt_roundps_epu8(__A, __B, 22); // expected-error {{invalid rounding argument}} -} - -__m512i test_mm512_ipcvtt_roundph_epi8(__m512h __A) { - return _mm512_ipcvtt_roundph_epi8(__A, 22); // expected-error {{invalid rounding argument}} -} - -__m512i test_mm512_mask_ipcvtt_roundph_epi8(__m512i __S, __mmask32 __A, __m512h __B) { - return _mm512_mask_ipcvtt_roundph_epi8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}} -} - -__m512i test_mm512_maskz_ipcvtt_roundph_epi8(__mmask32 __A, __m512h __B) { - return _mm512_maskz_ipcvtt_roundph_epi8(__A, __B, 22); // expected-error {{invalid rounding argument}} -} - -__m512i test_mm512_ipcvtt_roundph_epu8(__m512h __A) { - return _mm512_ipcvtt_roundph_epu8(__A, 22); // expected-error {{invalid rounding argument}} -} - -__m512i test_mm512_mask_ipcvtt_roundph_epu8(__m512i __S, __mmask32 __A, __m512h __B) { - return _mm512_mask_ipcvtt_roundph_epu8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}} -} - -__m512i test_mm512_maskz_ipcvtt_roundph_epu8(__mmask32 __A, __m512h __B) { - return _mm512_maskz_ipcvtt_roundph_epu8(__A, __B, 22); // expected-error {{invalid rounding argument}} -} - -__m512i test_mm512_ipcvtt_roundps_epi8(__m512 __A) { - return _mm512_ipcvtt_roundps_epi8(__A, 22); // expected-error {{invalid rounding argument}} -} - -__m512i test_mm512_mask_ipcvtt_roundps_epi8(__m512i __S, __mmask16 __A, __m512 __B) { - return _mm512_mask_ipcvtt_roundps_epi8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}} -} - -__m512i test_mm512_maskz_ipcvtt_roundps_epi8(__mmask16 __A, __m512 __B) { - return _mm512_maskz_ipcvtt_roundps_epi8(__A, __B, 22); // expected-error {{invalid rounding argument}} -} - -__m512i test_mm512_ipcvtt_roundps_epu8(__m512 __A) { - return _mm512_ipcvtt_roundps_epu8(__A, 22); // expected-error {{invalid rounding argument}} -} - -__m512i test_mm512_mask_ipcvtt_roundps_epu8(__m512i __S, __mmask16 __A, __m512 __B) { - return _mm512_mask_ipcvtt_roundps_epu8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}} -} - -__m512i test_mm512_maskz_ipcvtt_roundps_epu8(__mmask16 __A, __m512 __B) { - return _mm512_maskz_ipcvtt_roundps_epu8(__A, __B, 22); // expected-error {{invalid rounding argument}} -} - -__m256i test_mm256_ipcvt_roundph_epi8(__m256h __A) { - return _mm256_ipcvt_roundph_epi8(__A, 22); // expected-error {{invalid rounding argument}} -} - -__m256i test_mm256_mask_ipcvt_roundph_epi8(__m256i __S, __mmask16 __A, __m256h __B) { - return _mm256_mask_ipcvt_roundph_epi8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}} -} - -__m256i test_mm256_maskz_ipcvt_roundph_epi8(__mmask16 __A, __m256h __B) { - return _mm256_maskz_ipcvt_roundph_epi8(__A, __B, 22); // expected-error {{invalid rounding argument}} -} - -__m256i test_mm256_ipcvt_roundph_epu8(__m256h __A) { - return _mm256_ipcvt_roundph_epu8(__A, 22); // expected-error {{invalid rounding argument}} -} - -__m256i test_mm256_mask_ipcvt_roundph_epu8(__m256i __S, __mmask16 __A, __m256h __B) { - return _mm256_mask_ipcvt_roundph_epu8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}} -} - -__m256i test_mm256_maskz_ipcvt_roundph_epu8(__mmask16 __A, __m256h __B) { - return _mm256_maskz_ipcvt_roundph_epu8(__A, __B, 22); // expected-error {{invalid rounding argument}} -} - -__m256i test_mm256_ipcvt_roundps_epi8(__m256 __A) { - return _mm256_ipcvt_roundps_epi8(__A, 22); // expected-error {{invalid rounding argument}} -} - -__m256i test_mm256_mask_ipcvt_roundps_epi8(__m256i __S, __mmask8 __A, __m256 __B) { - return _mm256_mask_ipcvt_roundps_epi8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}} -} - -__m256i test_mm256_maskz_ipcvt_roundps_epi8(__mmask8 __A, __m256 __B) { - return _mm256_maskz_ipcvt_roundps_epi8(__A, __B, 22); // expected-error {{invalid rounding argument}} -} - -__m256i test_mm256_ipcvt_roundps_epu8(__m256 __A) { - return _mm256_ipcvt_roundps_epu8(__A, 22); // expected-error {{invalid rounding argument}} -} - -__m256i test_mm256_mask_ipcvt_roundps_epu8(__m256i __S, __mmask8 __A, __m256 __B) { - return _mm256_mask_ipcvt_roundps_epu8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}} -} - -__m256i test_mm256_maskz_ipcvt_roundps_epu8(__mmask8 __A, __m256 __B) { - return _mm256_maskz_ipcvt_roundps_epu8(__A, __B, 22); // expected-error {{invalid rounding argument}} -} - -__m256i test_mm256_ipcvtt_roundph_epi8(__m256h __A) { - return _mm256_ipcvtt_roundph_epi8(__A, 22); // expected-error {{invalid rounding argument}} -} - -__m256i test_mm256_mask_ipcvtt_roundph_epi8(__m256i __S, __mmask16 __A, __m256h __B) { - return _mm256_mask_ipcvtt_roundph_epi8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}} -} - -__m256i test_mm256_maskz_ipcvtt_roundph_epi8(__mmask16 __A, __m256h __B) { - return _mm256_maskz_ipcvtt_roundph_epi8(__A, __B, 22); // expected-error {{invalid rounding argument}} -} - -__m256i test_mm256_ipcvtt_roundph_epu8(__m256h __A) { - return _mm256_ipcvtt_roundph_epu8(__A, 22); // expected-error {{invalid rounding argument}} -} - -__m256i test_mm256_mask_ipcvtt_roundph_epu8(__m256i __S, __mmask16 __A, __m256h __B) { - return _mm256_mask_ipcvtt_roundph_epu8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}} -} - -__m256i test_mm256_maskz_ipcvtt_roundph_epu8(__mmask16 __A, __m256h __B) { - return _mm256_maskz_ipcvtt_roundph_epu8(__A, __B, 22); // expected-error {{invalid rounding argument}} -} - -__m256i test_mm256_ipcvtt_roundps_epi8(__m256 __A) { - return _mm256_ipcvtt_roundps_epi8(__A, 22); // expected-error {{invalid rounding argument}} -} - -__m256i test_mm256_mask_ipcvtt_roundps_epi8(__m256i __S, __mmask8 __A, __m256 __B) { - return _mm256_mask_ipcvtt_roundps_epi8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}} -} - -__m256i test_mm256_maskz_ipcvtt_roundps_epi8(__mmask8 __A, __m256 __B) { - return _mm256_maskz_ipcvtt_roundps_epi8(__A, __B, 22); // expected-error {{invalid rounding argument}} -} - -__m256i test_mm256_ipcvtt_roundps_epu8(__m256 __A) { - return _mm256_ipcvtt_roundps_epu8(__A, 22); // expected-error {{invalid rounding argument}} -} - -__m256i test_mm256_mask_ipcvtt_roundps_epu8(__m256i __S, __mmask8 __A, __m256 __B) { - return _mm256_mask_ipcvtt_roundps_epu8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}} -} - -__m256i test_mm256_maskz_ipcvtt_roundps_epu8(__mmask8 __A, __m256 __B) { - return _mm256_maskz_ipcvtt_roundps_epu8(__A, __B, 22); // expected-error {{invalid rounding argument}} -} diff --git a/clang/test/CodeGen/X86/avx10_2_512satcvt-builtins.c b/clang/test/CodeGen/X86/avx10_2_512satcvt-builtins.c index 0d3b0c278b44a..0a1c329144398 100755 --- a/clang/test/CodeGen/X86/avx10_2_512satcvt-builtins.c +++ b/clang/test/CodeGen/X86/avx10_2_512satcvt-builtins.c @@ -5,375 +5,375 @@ #include -__m512i test_mm512_ipcvtbf16_epi8(__m512bh __A) { - // CHECK-LABEL: @test_mm512_ipcvtbf16_epi8( +__m512i test_mm512_ipcvts_bf16_epi8(__m512bh __A) { + // CHECK-LABEL: @test_mm512_ipcvts_bf16_epi8( // CHECK: @llvm.x86.avx10.vcvtbf162ibs512 - return _mm512_ipcvtbf16_epi8(__A); + return _mm512_ipcvts_bf16_epi8(__A); } -__m512i test_mm512_mask_ipcvtbf16_epi8(__m512i __S, __mmask32 __A, __m512bh __B) { - // CHECK-LABEL: @test_mm512_mask_ipcvtbf16_epi8( +__m512i test_mm512_mask_ipcvts_bf16_epi8(__m512i __S, __mmask32 __A, __m512bh __B) { + // CHECK-LABEL: @test_mm512_mask_ipcvts_bf16_epi8( // CHECK: @llvm.x86.avx10.vcvtbf162ibs512 // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} - return _mm512_mask_ipcvtbf16_epi8(__S, __A, __B); + return _mm512_mask_ipcvts_bf16_epi8(__S, __A, __B); } -__m512i test_mm512_maskz_ipcvtbf16_epi8(__mmask32 __A, __m512bh __B) { - // CHECK-LABEL: @test_mm512_maskz_ipcvtbf16_epi8 +__m512i test_mm512_maskz_ipcvts_bf16_epi8(__mmask32 __A, __m512bh __B) { + // CHECK-LABEL: @test_mm512_maskz_ipcvts_bf16_epi8 // CHECK: @llvm.x86.avx10.vcvtbf162ibs512 // CHECK: zeroinitializer // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} - return _mm512_maskz_ipcvtbf16_epi8(__A, __B); + return _mm512_maskz_ipcvts_bf16_epi8(__A, __B); } -__m512i test_mm512_ipcvtbf16_epu8(__m512bh __A) { - // CHECK-LABEL: @test_mm512_ipcvtbf16_epu8( +__m512i test_mm512_ipcvts_bf16_epu8(__m512bh __A) { + // CHECK-LABEL: @test_mm512_ipcvts_bf16_epu8( // CHECK: @llvm.x86.avx10.vcvtbf162iubs512 - return _mm512_ipcvtbf16_epu8(__A); + return _mm512_ipcvts_bf16_epu8(__A); } -__m512i test_mm512_mask_ipcvtbf16_epu8(__m512i __S, __mmask32 __A, __m512bh __B) { - // CHECK-LABEL: @test_mm512_mask_ipcvtbf16_epu8( +__m512i test_mm512_mask_ipcvts_bf16_epu8(__m512i __S, __mmask32 __A, __m512bh __B) { + // CHECK-LABEL: @test_mm512_mask_ipcvts_bf16_epu8( // CHECK: @llvm.x86.avx10.vcvtbf162iubs512 // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} - return _mm512_mask_ipcvtbf16_epu8(__S, __A, __B); + return _mm512_mask_ipcvts_bf16_epu8(__S, __A, __B); } -__m512i test_mm512_maskz_ipcvtbf16_epu8(__mmask32 __A, __m512bh __B) { - // CHECK-LABEL: @test_mm512_maskz_ipcvtbf16_epu8 +__m512i test_mm512_maskz_ipcvts_bf16_epu8(__mmask32 __A, __m512bh __B) { + // CHECK-LABEL: @test_mm512_maskz_ipcvts_bf16_epu8 // CHECK: @llvm.x86.avx10.vcvtbf162iubs512 // CHECK: zeroinitializer // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} - return _mm512_maskz_ipcvtbf16_epu8(__A, __B); + return _mm512_maskz_ipcvts_bf16_epu8(__A, __B); } -__m512i test_mm512_ipcvtph_epi8(__m512h __A) { - // CHECK-LABEL: @test_mm512_ipcvtph_epi8( +__m512i test_mm512_ipcvts_ph_epi8(__m512h __A) { + // CHECK-LABEL: @test_mm512_ipcvts_ph_epi8( // CHECK: @llvm.x86.avx10.mask.vcvtph2ibs512 - return _mm512_ipcvtph_epi8(__A); + return _mm512_ipcvts_ph_epi8(__A); } -__m512i test_mm512_mask_ipcvtph_epi8(__m512i __S, __mmask32 __A, __m512h __B) { - // CHECK-LABEL: @test_mm512_mask_ipcvtph_epi8( +__m512i test_mm512_mask_ipcvts_ph_epi8(__m512i __S, __mmask32 __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_mask_ipcvts_ph_epi8( // CHECK: @llvm.x86.avx10.mask.vcvtph2ibs512 - return _mm512_mask_ipcvtph_epi8(__S, __A, __B); + return _mm512_mask_ipcvts_ph_epi8(__S, __A, __B); } -__m512i test_mm512_maskz_ipcvtph_epi8(__mmask32 __A, __m512h __B) { - // CHECK-LABEL: @test_mm512_maskz_ipcvtph_epi8( +__m512i test_mm512_maskz_ipcvts_ph_epi8(__mmask32 __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_maskz_ipcvts_ph_epi8( // CHECK: @llvm.x86.avx10.mask.vcvtph2ibs512 - return _mm512_maskz_ipcvtph_epi8(__A, __B); + return _mm512_maskz_ipcvts_ph_epi8(__A, __B); } -__m512i test_mm512_ipcvt_roundph_epi8(__m512h __A) { - // CHECK-LABEL: @test_mm512_ipcvt_roundph_epi8( +__m512i test_mm512_ipcvts_roundph_epi8(__m512h __A) { + // CHECK-LABEL: @test_mm512_ipcvts_roundph_epi8( // CHECK: @llvm.x86.avx10.mask.vcvtph2ibs512 - return _mm512_ipcvt_roundph_epi8(__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + return _mm512_ipcvts_roundph_epi8(__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } -__m512i test_mm512_mask_ipcvt_roundph_epi8(__m512i __S, __mmask32 __A, __m512h __B) { - // CHECK-LABEL: @test_mm512_mask_ipcvt_roundph_epi8 +__m512i test_mm512_mask_ipcvts_roundph_epi8(__m512i __S, __mmask32 __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_mask_ipcvts_roundph_epi8 // CHECK: @llvm.x86.avx10.mask.vcvtph2ibs512 - return _mm512_mask_ipcvt_roundph_epi8(__S, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + return _mm512_mask_ipcvts_roundph_epi8(__S, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } -__m512i test_mm512_maskz_ipcvt_roundph_epi8(__mmask32 __A, __m512h __B) { - // CHECK-LABEL: @test_mm512_maskz_ipcvt_roundph_epi8 +__m512i test_mm512_maskz_ipcvts_roundph_epi8(__mmask32 __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_maskz_ipcvts_roundph_epi8 // CHECK: @llvm.x86.avx10.mask.vcvtph2ibs512 - return _mm512_maskz_ipcvt_roundph_epi8(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + return _mm512_maskz_ipcvts_roundph_epi8(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } -__m512i test_mm512_ipcvtph_epu8(__m512h __A) { - // CHECK-LABEL: @test_mm512_ipcvtph_epu8( +__m512i test_mm512_ipcvts_ph_epu8(__m512h __A) { + // CHECK-LABEL: @test_mm512_ipcvts_ph_epu8( // CHECK: @llvm.x86.avx10.mask.vcvtph2iubs512 - return _mm512_ipcvtph_epu8(__A); + return _mm512_ipcvts_ph_epu8(__A); } -__m512i test_mm512_mask_ipcvtph_epu8(__m512i __S, __mmask32 __A, __m512h __B) { - // CHECK-LABEL: @test_mm512_mask_ipcvtph_epu8( +__m512i test_mm512_mask_ipcvts_ph_epu8(__m512i __S, __mmask32 __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_mask_ipcvts_ph_epu8( // CHECK: @llvm.x86.avx10.mask.vcvtph2iubs512 - return _mm512_mask_ipcvtph_epu8(__S, __A, __B); + return _mm512_mask_ipcvts_ph_epu8(__S, __A, __B); } -__m512i test_mm512_maskz_ipcvtph_epu8(__mmask32 __A, __m512h __B) { - // CHECK-LABEL: @test_mm512_maskz_ipcvtph_epu8( +__m512i test_mm512_maskz_ipcvts_ph_epu8(__mmask32 __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_maskz_ipcvts_ph_epu8( // CHECK: @llvm.x86.avx10.mask.vcvtph2iubs512 - return _mm512_maskz_ipcvtph_epu8(__A, __B); + return _mm512_maskz_ipcvts_ph_epu8(__A, __B); } -__m512i test_mm512_ipcvt_roundph_epu8(__m512h __A) { - // CHECK-LABEL: @test_mm512_ipcvt_roundph_epu8( +__m512i test_mm512_ipcvts_roundph_epu8(__m512h __A) { + // CHECK-LABEL: @test_mm512_ipcvts_roundph_epu8( // CHECK: @llvm.x86.avx10.mask.vcvtph2iubs512 - return _mm512_ipcvt_roundph_epu8(__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + return _mm512_ipcvts_roundph_epu8(__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } -__m512i test_mm512_mask_ipcvt_roundph_epu8(__m512i __S, __mmask32 __A, __m512h __B) { - // CHECK-LABEL: @test_mm512_mask_ipcvt_roundph_epu8 +__m512i test_mm512_mask_ipcvts_roundph_epu8(__m512i __S, __mmask32 __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_mask_ipcvts_roundph_epu8 // CHECK: @llvm.x86.avx10.mask.vcvtph2iubs512 - return _mm512_mask_ipcvt_roundph_epu8(__S, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + return _mm512_mask_ipcvts_roundph_epu8(__S, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } -__m512i test_mm512_maskz_ipcvt_roundph_epu8(__mmask32 __A, __m512h __B) { - // CHECK-LABEL: @test_mm512_maskz_ipcvt_roundph_epu8 +__m512i test_mm512_maskz_ipcvts_roundph_epu8(__mmask32 __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_maskz_ipcvts_roundph_epu8 // CHECK: @llvm.x86.avx10.mask.vcvtph2iubs512 - return _mm512_maskz_ipcvt_roundph_epu8(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + return _mm512_maskz_ipcvts_roundph_epu8(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } -__m512i test_mm512_ipcvtps_epi8(__m512 __A) { - // CHECK-LABEL: @test_mm512_ipcvtps_epi8( +__m512i test_mm512_ipcvts_ps_epi8(__m512 __A) { + // CHECK-LABEL: @test_mm512_ipcvts_ps_epi8( // CHECK: @llvm.x86.avx10.mask.vcvtps2ibs512 - return _mm512_ipcvtps_epi8(__A); + return _mm512_ipcvts_ps_epi8(__A); } -__m512i test_mm512_mask_ipcvtps_epi8(__m512i __S, __mmask16 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_ipcvtps_epi8( +__m512i test_mm512_mask_ipcvts_ps_epi8(__m512i __S, __mmask16 __A, __m512 __B) { + // CHECK-LABEL: @test_mm512_mask_ipcvts_ps_epi8( // CHECK: @llvm.x86.avx10.mask.vcvtps2ibs512 - return _mm512_mask_ipcvtps_epi8(__S, __A, __B); + return _mm512_mask_ipcvts_ps_epi8(__S, __A, __B); } -__m512i test_mm512_maskz_ipcvtps_epi8(__mmask16 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_ipcvtps_epi8( +__m512i test_mm512_maskz_ipcvts_ps_epi8(__mmask16 __A, __m512 __B) { + // CHECK-LABEL: @test_mm512_maskz_ipcvts_ps_epi8( // CHECK: @llvm.x86.avx10.mask.vcvtps2ibs512 - return _mm512_maskz_ipcvtps_epi8(__A, __B); + return _mm512_maskz_ipcvts_ps_epi8(__A, __B); } -__m512i test_mm512_ipcvt_roundps_epi8(__m512 __A) { - // CHECK-LABEL: @test_mm512_ipcvt_roundps_epi8( +__m512i test_mm512_ipcvts_roundps_epi8(__m512 __A) { + // CHECK-LABEL: @test_mm512_ipcvts_roundps_epi8( // CHECK: @llvm.x86.avx10.mask.vcvtps2ibs512 - return _mm512_ipcvt_roundps_epi8(__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + return _mm512_ipcvts_roundps_epi8(__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } -__m512i test_mm512_mask_ipcvt_roundps_epi8(__m512i __S, __mmask16 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_ipcvt_roundps_epi8 +__m512i test_mm512_mask_ipcvts_roundps_epi8(__m512i __S, __mmask16 __A, __m512 __B) { + // CHECK-LABEL: @test_mm512_mask_ipcvts_roundps_epi8 // CHECK: @llvm.x86.avx10.mask.vcvtps2ibs512 - return _mm512_mask_ipcvt_roundps_epi8(__S, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + return _mm512_mask_ipcvts_roundps_epi8(__S, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } -__m512i test_mm512_maskz_ipcvt_roundps_epi8(__mmask16 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_ipcvt_roundps_epi8 +__m512i test_mm512_maskz_ipcvts_roundps_epi8(__mmask16 __A, __m512 __B) { + // CHECK-LABEL: @test_mm512_maskz_ipcvts_roundps_epi8 // CHECK: @llvm.x86.avx10.mask.vcvtps2ibs512 - return _mm512_maskz_ipcvt_roundps_epi8(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + return _mm512_maskz_ipcvts_roundps_epi8(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } -__m512i test_mm512_ipcvtps_epu8(__m512 __A) { - // CHECK-LABEL: @test_mm512_ipcvtps_epu8( +__m512i test_mm512_ipcvts_ps_epu8(__m512 __A) { + // CHECK-LABEL: @test_mm512_ipcvts_ps_epu8( // CHECK: @llvm.x86.avx10.mask.vcvtps2iubs512 - return _mm512_ipcvtps_epu8(__A); + return _mm512_ipcvts_ps_epu8(__A); } -__m512i test_mm512_mask_ipcvtps_epu8(__m512i __S, __mmask16 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_ipcvtps_epu8( +__m512i test_mm512_mask_ipcvts_ps_epu8(__m512i __S, __mmask16 __A, __m512 __B) { + // CHECK-LABEL: @test_mm512_mask_ipcvts_ps_epu8( // CHECK: @llvm.x86.avx10.mask.vcvtps2iubs512 - return _mm512_mask_ipcvtps_epu8(__S, __A, __B); + return _mm512_mask_ipcvts_ps_epu8(__S, __A, __B); } -__m512i test_mm512_maskz_ipcvtps_epu8(__mmask16 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_ipcvtps_epu8( +__m512i test_mm512_maskz_ipcvts_ps_epu8(__mmask16 __A, __m512 __B) { + // CHECK-LABEL: @test_mm512_maskz_ipcvts_ps_epu8( // CHECK: @llvm.x86.avx10.mask.vcvtps2iubs512 - return _mm512_maskz_ipcvtps_epu8(__A, __B); + return _mm512_maskz_ipcvts_ps_epu8(__A, __B); } -__m512i test_mm512_ipcvt_roundps_epu8(__m512 __A) { - // CHECK-LABEL: @test_mm512_ipcvt_roundps_epu8( +__m512i test_mm512_ipcvts_roundps_epu8(__m512 __A) { + // CHECK-LABEL: @test_mm512_ipcvts_roundps_epu8( // CHECK: @llvm.x86.avx10.mask.vcvtps2iubs512 - return _mm512_ipcvt_roundps_epu8(__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + return _mm512_ipcvts_roundps_epu8(__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } -__m512i test_mm512_mask_ipcvt_roundps_epu8(__m512i __S, __mmask16 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_ipcvt_roundps_epu8 +__m512i test_mm512_mask_ipcvts_roundps_epu8(__m512i __S, __mmask16 __A, __m512 __B) { + // CHECK-LABEL: @test_mm512_mask_ipcvts_roundps_epu8 // CHECK: @llvm.x86.avx10.mask.vcvtps2iubs512 - return _mm512_mask_ipcvt_roundps_epu8(__S, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + return _mm512_mask_ipcvts_roundps_epu8(__S, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } -__m512i test_mm512_maskz_ipcvt_roundps_epu8(__mmask16 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_ipcvt_roundps_epu8 +__m512i test_mm512_maskz_ipcvts_roundps_epu8(__mmask16 __A, __m512 __B) { + // CHECK-LABEL: @test_mm512_maskz_ipcvts_roundps_epu8 // CHECK: @llvm.x86.avx10.mask.vcvtps2iubs512 - return _mm512_maskz_ipcvt_roundps_epu8(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + return _mm512_maskz_ipcvts_roundps_epu8(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } -__m512i test_mm512_ipcvttbf16_epi8(__m512bh __A) { - // CHECK-LABEL: @test_mm512_ipcvttbf16_epi8( +__m512i test_mm512_ipcvtts_bf16_epi8(__m512bh __A) { + // CHECK-LABEL: @test_mm512_ipcvtts_bf16_epi8( // CHECK: @llvm.x86.avx10.vcvttbf162ibs512( - return _mm512_ipcvttbf16_epi8(__A); + return _mm512_ipcvtts_bf16_epi8(__A); } -__m512i test_mm512_mask_ipcvttbf16_epi8(__m512i __S, __mmask32 __A, __m512bh __B) { - // CHECK-LABEL: @test_mm512_mask_ipcvttbf16_epi8( +__m512i test_mm512_mask_ipcvtts_bf16_epi8(__m512i __S, __mmask32 __A, __m512bh __B) { + // CHECK-LABEL: @test_mm512_mask_ipcvtts_bf16_epi8( // CHECK: @llvm.x86.avx10.vcvttbf162ibs512( // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} - return _mm512_mask_ipcvttbf16_epi8(__S, __A, __B); + return _mm512_mask_ipcvtts_bf16_epi8(__S, __A, __B); } -__m512i test_mm512_maskz_ipcvttbf16_epi8(__mmask32 __A, __m512bh __B) { - // CHECK-LABEL: @test_mm512_maskz_ipcvttbf16_epi8 +__m512i test_mm512_maskz_ipcvtts_bf16_epi8(__mmask32 __A, __m512bh __B) { + // CHECK-LABEL: @test_mm512_maskz_ipcvtts_bf16_epi8 // CHECK: @llvm.x86.avx10.vcvttbf162ibs512( // CHECK: zeroinitializer // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} - return _mm512_maskz_ipcvttbf16_epi8(__A, __B); + return _mm512_maskz_ipcvtts_bf16_epi8(__A, __B); } -__m512i test_mm512_ipcvttbf16_epu8(__m512bh __A) { - // CHECK-LABEL: @test_mm512_ipcvttbf16_epu8( +__m512i test_mm512_ipcvtts_bf16_epu8(__m512bh __A) { + // CHECK-LABEL: @test_mm512_ipcvtts_bf16_epu8( // CHECK: @llvm.x86.avx10.vcvttbf162iubs512( - return _mm512_ipcvttbf16_epu8(__A); + return _mm512_ipcvtts_bf16_epu8(__A); } -__m512i test_mm512_mask_ipcvttbf16_epu8(__m512i __S, __mmask32 __A, __m512bh __B) { - // CHECK-LABEL: @test_mm512_mask_ipcvttbf16_epu8( +__m512i test_mm512_mask_ipcvtts_bf16_epu8(__m512i __S, __mmask32 __A, __m512bh __B) { + // CHECK-LABEL: @test_mm512_mask_ipcvtts_bf16_epu8( // CHECK: @llvm.x86.avx10.vcvttbf162iubs512( // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} - return _mm512_mask_ipcvttbf16_epu8(__S, __A, __B); + return _mm512_mask_ipcvtts_bf16_epu8(__S, __A, __B); } -__m512i test_mm512_maskz_ipcvttbf16_epu8(__mmask32 __A, __m512bh __B) { - // CHECK-LABEL: @test_mm512_maskz_ipcvttbf16_epu8 +__m512i test_mm512_maskz_ipcvtts_bf16_epu8(__mmask32 __A, __m512bh __B) { + // CHECK-LABEL: @test_mm512_maskz_ipcvtts_bf16_epu8 // CHECK: @llvm.x86.avx10.vcvttbf162iubs512( // CHECK: zeroinitializer // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} - return _mm512_maskz_ipcvttbf16_epu8(__A, __B); + return _mm512_maskz_ipcvtts_bf16_epu8(__A, __B); } -__m512i test_mm512_ipcvttph_epi8(__m512h __A) { - // CHECK-LABEL: @test_mm512_ipcvttph_epi8( +__m512i test_mm512_ipcvtts_ph_epi8(__m512h __A) { + // CHECK-LABEL: @test_mm512_ipcvtts_ph_epi8( // CHECK: @llvm.x86.avx10.mask.vcvttph2ibs512 - return _mm512_ipcvttph_epi8(__A); + return _mm512_ipcvtts_ph_epi8(__A); } -__m512i test_mm512_mask_ipcvttph_epi8(__m512i __S, __mmask32 __A, __m512h __B) { - // CHECK-LABEL: @test_mm512_mask_ipcvttph_epi8( +__m512i test_mm512_mask_ipcvtts_ph_epi8(__m512i __S, __mmask32 __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_mask_ipcvtts_ph_epi8( // CHECK: @llvm.x86.avx10.mask.vcvttph2ibs512 - return _mm512_mask_ipcvttph_epi8(__S, __A, __B); + return _mm512_mask_ipcvtts_ph_epi8(__S, __A, __B); } -__m512i test_mm512_maskz_ipcvttph_epi8(__mmask32 __A, __m512h __B) { - // CHECK-LABEL: @test_mm512_maskz_ipcvttph_epi8 +__m512i test_mm512_maskz_ipcvtts_ph_epi8(__mmask32 __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_maskz_ipcvtts_ph_epi8 // CHECK: @llvm.x86.avx10.mask.vcvttph2ibs512 - return _mm512_maskz_ipcvttph_epi8(__A, __B); + return _mm512_maskz_ipcvtts_ph_epi8(__A, __B); } -__m512i test_mm512_ipcvtt_roundph_epi8(__m512h __A) { - // CHECK-LABEL: @test_mm512_ipcvtt_roundph_epi8 +__m512i test_mm512_ipcvtts_roundph_epi8(__m512h __A) { + // CHECK-LABEL: @test_mm512_ipcvtts_roundph_epi8 // CHECK: @llvm.x86.avx10.mask.vcvttph2ibs512 - return _mm512_ipcvtt_roundph_epi8(__A, _MM_FROUND_NO_EXC); + return _mm512_ipcvtts_roundph_epi8(__A, _MM_FROUND_NO_EXC); } -__m512i test_mm512_mask_ipcvtt_roundph_epi8(__m512i __S, __mmask32 __A, __m512h __B) { - // CHECK-LABEL: @test_mm512_mask_ipcvtt_roundph_epi8 +__m512i test_mm512_mask_ipcvtts_roundph_epi8(__m512i __S, __mmask32 __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_mask_ipcvtts_roundph_epi8 // CHECK: @llvm.x86.avx10.mask.vcvttph2ibs512 - return _mm512_mask_ipcvtt_roundph_epi8(__S, __A, __B, _MM_FROUND_NO_EXC); + return _mm512_mask_ipcvtts_roundph_epi8(__S, __A, __B, _MM_FROUND_NO_EXC); } -__m512i test_mm512_maskz_ipcvtt_roundph_epi8(__mmask32 __A, __m512h __B) { - // CHECK-LABEL: @test_mm512_maskz_ipcvtt_roundph_epi8 +__m512i test_mm512_maskz_ipcvtts_roundph_epi8(__mmask32 __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_maskz_ipcvtts_roundph_epi8 // CHECK: @llvm.x86.avx10.mask.vcvttph2ibs512 - return _mm512_maskz_ipcvtt_roundph_epi8(__A, __B, _MM_FROUND_NO_EXC); + return _mm512_maskz_ipcvtts_roundph_epi8(__A, __B, _MM_FROUND_NO_EXC); } -__m512i test_mm512_ipcvttph_epu8(__m512h __A) { - // CHECK-LABEL: @test_mm512_ipcvttph_epu8( +__m512i test_mm512_ipcvtts_ph_epu8(__m512h __A) { + // CHECK-LABEL: @test_mm512_ipcvtts_ph_epu8( // CHECK: @llvm.x86.avx10.mask.vcvttph2iubs512 - return _mm512_ipcvttph_epu8(__A); + return _mm512_ipcvtts_ph_epu8(__A); } -__m512i test_mm512_mask_ipcvttph_epu8(__m512i __S, __mmask32 __A, __m512h __B) { - // CHECK-LABEL: @test_mm512_mask_ipcvttph_epu8( +__m512i test_mm512_mask_ipcvtts_ph_epu8(__m512i __S, __mmask32 __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_mask_ipcvtts_ph_epu8( // CHECK: @llvm.x86.avx10.mask.vcvttph2iubs512 - return _mm512_mask_ipcvttph_epu8(__S, __A, __B); + return _mm512_mask_ipcvtts_ph_epu8(__S, __A, __B); } -__m512i test_mm512_maskz_ipcvttph_epu8(__mmask32 __A, __m512h __B) { - // CHECK-LABEL: @test_mm512_maskz_ipcvttph_epu8 +__m512i test_mm512_maskz_ipcvtts_ph_epu8(__mmask32 __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_maskz_ipcvtts_ph_epu8 // CHECK: @llvm.x86.avx10.mask.vcvttph2iubs512 - return _mm512_maskz_ipcvttph_epu8(__A, __B); + return _mm512_maskz_ipcvtts_ph_epu8(__A, __B); } -__m512i test_mm512_ipcvtt_roundph_epu8(__m512h __A) { - // CHECK-LABEL: @test_mm512_ipcvtt_roundph_epu8 +__m512i test_mm512_ipcvtts_roundph_epu8(__m512h __A) { + // CHECK-LABEL: @test_mm512_ipcvtts_roundph_epu8 // CHECK: @llvm.x86.avx10.mask.vcvttph2iubs512 - return _mm512_ipcvtt_roundph_epu8(__A, _MM_FROUND_NO_EXC); + return _mm512_ipcvtts_roundph_epu8(__A, _MM_FROUND_NO_EXC); } -__m512i test_mm512_mask_ipcvtt_roundph_epu8(__m512i __S, __mmask32 __A, __m512h __B) { - // CHECK-LABEL: @test_mm512_mask_ipcvtt_roundph_epu8 +__m512i test_mm512_mask_ipcvtts_roundph_epu8(__m512i __S, __mmask32 __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_mask_ipcvtts_roundph_epu8 // CHECK: @llvm.x86.avx10.mask.vcvttph2iubs512 - return _mm512_mask_ipcvtt_roundph_epu8(__S, __A, __B, _MM_FROUND_NO_EXC); + return _mm512_mask_ipcvtts_roundph_epu8(__S, __A, __B, _MM_FROUND_NO_EXC); } -__m512i test_mm512_maskz_ipcvtt_roundph_epu8(__mmask32 __A, __m512h __B) { - // CHECK-LABEL: @test_mm512_maskz_ipcvtt_roundph_epu8 +__m512i test_mm512_maskz_ipcvtts_roundph_epu8(__mmask32 __A, __m512h __B) { + // CHECK-LABEL: @test_mm512_maskz_ipcvtts_roundph_epu8 // CHECK: @llvm.x86.avx10.mask.vcvttph2iubs512 - return _mm512_maskz_ipcvtt_roundph_epu8(__A, __B, _MM_FROUND_NO_EXC); + return _mm512_maskz_ipcvtts_roundph_epu8(__A, __B, _MM_FROUND_NO_EXC); } -__m512i test_mm512_ipcvttps_epi8(__m512 __A) { - // CHECK-LABEL: @test_mm512_ipcvttps_epi8( +__m512i test_mm512_ipcvtts_ps_epi8(__m512 __A) { + // CHECK-LABEL: @test_mm512_ipcvtts_ps_epi8( // CHECK: @llvm.x86.avx10.mask.vcvttps2ibs512 - return _mm512_ipcvttps_epi8(__A); + return _mm512_ipcvtts_ps_epi8(__A); } -__m512i test_mm512_mask_ipcvttps_epi8(__m512i __S, __mmask16 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_ipcvttps_epi8( +__m512i test_mm512_mask_ipcvtts_ps_epi8(__m512i __S, __mmask16 __A, __m512 __B) { + // CHECK-LABEL: @test_mm512_mask_ipcvtts_ps_epi8( // CHECK: @llvm.x86.avx10.mask.vcvttps2ibs512 - return _mm512_mask_ipcvttps_epi8(__S, __A, __B); + return _mm512_mask_ipcvtts_ps_epi8(__S, __A, __B); } -__m512i test_mm512_maskz_ipcvttps_epi8(__mmask16 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_ipcvttps_epi8 +__m512i test_mm512_maskz_ipcvtts_ps_epi8(__mmask16 __A, __m512 __B) { + // CHECK-LABEL: @test_mm512_maskz_ipcvtts_ps_epi8 // CHECK: @llvm.x86.avx10.mask.vcvttps2ibs512 - return _mm512_maskz_ipcvttps_epi8(__A, __B); + return _mm512_maskz_ipcvtts_ps_epi8(__A, __B); } -__m512i test_mm512_ipcvtt_roundps_epi8(__m512 __A) { - // CHECK-LABEL: @test_mm512_ipcvtt_roundps_epi8 +__m512i test_mm512_ipcvtts_roundps_epi8(__m512 __A) { + // CHECK-LABEL: @test_mm512_ipcvtts_roundps_epi8 // CHECK: @llvm.x86.avx10.mask.vcvttps2ibs512 - return _mm512_ipcvtt_roundps_epi8(__A, _MM_FROUND_NO_EXC); + return _mm512_ipcvtts_roundps_epi8(__A, _MM_FROUND_NO_EXC); } -__m512i test_mm512_mask_ipcvtt_roundps_epi8(__m512i __S, __mmask16 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_ipcvtt_roundps_epi8 +__m512i test_mm512_mask_ipcvtts_roundps_epi8(__m512i __S, __mmask16 __A, __m512 __B) { + // CHECK-LABEL: @test_mm512_mask_ipcvtts_roundps_epi8 // CHECK: @llvm.x86.avx10.mask.vcvttps2ibs512 - return _mm512_mask_ipcvtt_roundps_epi8(__S, __A, __B, _MM_FROUND_NO_EXC); + return _mm512_mask_ipcvtts_roundps_epi8(__S, __A, __B, _MM_FROUND_NO_EXC); } -__m512i test_mm512_maskz_ipcvtt_roundps_epi8(__mmask16 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_ipcvtt_roundps_epi8 +__m512i test_mm512_maskz_ipcvtts_roundps_epi8(__mmask16 __A, __m512 __B) { + // CHECK-LABEL: @test_mm512_maskz_ipcvtts_roundps_epi8 // CHECK: @llvm.x86.avx10.mask.vcvttps2ibs512 - return _mm512_maskz_ipcvtt_roundps_epi8(__A, __B, _MM_FROUND_NO_EXC); + return _mm512_maskz_ipcvtts_roundps_epi8(__A, __B, _MM_FROUND_NO_EXC); } -__m512i test_mm512_ipcvttps_epu8(__m512 __A) { - // CHECK-LABEL: @test_mm512_ipcvttps_epu8( +__m512i test_mm512_ipcvtts_ps_epu8(__m512 __A) { + // CHECK-LABEL: @test_mm512_ipcvtts_ps_epu8( // CHECK: @llvm.x86.avx10.mask.vcvttps2iubs512 - return _mm512_ipcvttps_epu8(__A); + return _mm512_ipcvtts_ps_epu8(__A); } -__m512i test_mm512_mask_ipcvttps_epu8(__m512i __S, __mmask16 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_ipcvttps_epu8( +__m512i test_mm512_mask_ipcvtts_ps_epu8(__m512i __S, __mmask16 __A, __m512 __B) { + // CHECK-LABEL: @test_mm512_mask_ipcvtts_ps_epu8( // CHECK: @llvm.x86.avx10.mask.vcvttps2iubs512 - return _mm512_mask_ipcvttps_epu8(__S, __A, __B); + return _mm512_mask_ipcvtts_ps_epu8(__S, __A, __B); } -__m512i test_mm512_maskz_ipcvttps_epu8(__mmask16 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_ipcvttps_epu8 +__m512i test_mm512_maskz_ipcvtts_ps_epu8(__mmask16 __A, __m512 __B) { + // CHECK-LABEL: @test_mm512_maskz_ipcvtts_ps_epu8 // CHECK: @llvm.x86.avx10.mask.vcvttps2iubs512 - return _mm512_maskz_ipcvttps_epu8(__A, __B); + return _mm512_maskz_ipcvtts_ps_epu8(__A, __B); } -__m512i test_mm512_ipcvtt_roundps_epu8(__m512 __A) { - // CHECK-LABEL: @test_mm512_ipcvtt_roundps_epu8 +__m512i test_mm512_ipcvtts_roundps_epu8(__m512 __A) { + // CHECK-LABEL: @test_mm512_ipcvtts_roundps_epu8 // CHECK: @llvm.x86.avx10.mask.vcvttps2iubs512 - return _mm512_ipcvtt_roundps_epu8(__A, _MM_FROUND_NO_EXC); + return _mm512_ipcvtts_roundps_epu8(__A, _MM_FROUND_NO_EXC); } -__m512i test_mm512_mask_ipcvtt_roundps_epu8(__m512i __S, __mmask16 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_mask_ipcvtt_roundps_epu8 +__m512i test_mm512_mask_ipcvtts_roundps_epu8(__m512i __S, __mmask16 __A, __m512 __B) { + // CHECK-LABEL: @test_mm512_mask_ipcvtts_roundps_epu8 // CHECK: @llvm.x86.avx10.mask.vcvttps2iubs512 - return _mm512_mask_ipcvtt_roundps_epu8(__S, __A, __B, _MM_FROUND_NO_EXC); + return _mm512_mask_ipcvtts_roundps_epu8(__S, __A, __B, _MM_FROUND_NO_EXC); } -__m512i test_mm512_maskz_ipcvtt_roundps_epu8(__mmask16 __A, __m512 __B) { - // CHECK-LABEL: @test_mm512_maskz_ipcvtt_roundps_epu8 +__m512i test_mm512_maskz_ipcvtts_roundps_epu8(__mmask16 __A, __m512 __B) { + // CHECK-LABEL: @test_mm512_maskz_ipcvtts_roundps_epu8 // CHECK: @llvm.x86.avx10.mask.vcvttps2iubs512 - return _mm512_maskz_ipcvtt_roundps_epu8(__A, __B, _MM_FROUND_NO_EXC); + return _mm512_maskz_ipcvtts_roundps_epu8(__A, __B, _MM_FROUND_NO_EXC); } diff --git a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64.c b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64.c index 8c8959a03d7bd..1aaa6544d1f9c 100644 --- a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64.c +++ b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins-x64.c @@ -3,58 +3,58 @@ #include #include -long long test_mm_cvttssd_si64(__m128d __A) { - // CHECK-LABEL: @test_mm_cvttssd_si64( +long long test_mm_cvtts_sd_si64(__m128d __A) { + // CHECK-LABEL: @test_mm_cvtts_sd_si64( // CHECK: @llvm.x86.avx10.vcvttsd2sis64(<2 x double> return _mm_cvtts_roundsd_si64(__A, _MM_FROUND_NO_EXC); } -long long test_mm_cvttssd_i64(__m128d __A) { - // CHECK-LABEL: @test_mm_cvttssd_i64( +long long test_mm_cvtts_sd_i64(__m128d __A) { + // CHECK-LABEL: @test_mm_cvtts_sd_i64( // CHECK: @llvm.x86.avx10.vcvttsd2sis64(<2 x double> return _mm_cvtts_roundsd_i64(__A, _MM_FROUND_NO_EXC); } -unsigned long long test_mm_cvttssd_u64(__m128d __A) { - // CHECK-LABEL: @test_mm_cvttssd_u64( +unsigned long long test_mm_cvtts_sd_u64(__m128d __A) { + // CHECK-LABEL: @test_mm_cvtts_sd_u64( // CHECK: @llvm.x86.avx10.vcvttsd2usis64(<2 x double> return _mm_cvtts_roundsd_u64(__A, _MM_FROUND_NO_EXC); } -float test_mm_cvttsss_i64(__m128 __A) { - // CHECK-LABEL: @test_mm_cvttsss_i64( +float test_mm_cvtts_ss_i64(__m128 __A) { + // CHECK-LABEL: @test_mm_cvtts_ss_i64( // CHECK: @llvm.x86.avx10.vcvttss2sis64(<4 x float> return _mm_cvtts_roundss_i64(__A, _MM_FROUND_NO_EXC); } -long long test_mm_cvttsss_si64(__m128 __A) { - // CHECK-LABEL: @test_mm_cvttsss_si64( +long long test_mm_cvtts_ss_si64(__m128 __A) { + // CHECK-LABEL: @test_mm_cvtts_ss_si64( // CHECK: @llvm.x86.avx10.vcvttss2sis64(<4 x float> return _mm_cvtts_roundss_si64(__A, _MM_FROUND_NO_EXC); } -unsigned long long test_mm_cvttsss_u64(__m128 __A) { - // CHECK-LABEL: @test_mm_cvttsss_u64( +unsigned long long test_mm_cvtts_ss_u64(__m128 __A) { + // CHECK-LABEL: @test_mm_cvtts_ss_u64( // CHECK: @llvm.x86.avx10.vcvttss2usis64(<4 x float> return _mm_cvtts_roundss_u64(__A, _MM_FROUND_NO_EXC); } -__m512i test_mm512_cvttspd_epi64(__m512d A) { - // CHECK-LABEL: test_mm512_cvttspd_epi64 +__m512i test_mm512_cvtts_pd_epi64(__m512d A) { + // CHECK-LABEL: test_mm512_cvtts_pd_epi64 // CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.512(<8 x double> - return _mm512_cvttspd_epi64(A); + return _mm512_cvtts_pd_epi64(A); } -__m512i test_mm512_mask_cvttspd_epi64(__m512i W, __mmask8 U, __m512d A) { - // CHECK-LABEL: test_mm512_mask_cvttspd_epi64 +__m512i test_mm512_mask_cvtts_pd_epi64(__m512i W, __mmask8 U, __m512d A) { + // CHECK-LABEL: test_mm512_mask_cvtts_pd_epi64 // CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.512(<8 x double> - return _mm512_mask_cvttspd_epi64(W, U, A); + return _mm512_mask_cvtts_pd_epi64(W, U, A); } -__m512i test_mm512_maskz_cvttspd_epi64(__mmask8 U, __m512d A) { - // CHECK-LABEL: test_mm512_maskz_cvttspd_epi64 +__m512i test_mm512_maskz_cvtts_pd_epi64(__mmask8 U, __m512d A) { + // CHECK-LABEL: test_mm512_maskz_cvtts_pd_epi64 // CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.512(<8 x double> - return _mm512_maskz_cvttspd_epi64(U, A); + return _mm512_maskz_cvtts_pd_epi64(U, A); } __m512i test_mm512_cvtts_roundpd_epi64(__m512d A) { @@ -75,22 +75,22 @@ __m512i test_mm512_maskz_cvtts_roundpd_epi64(__mmask8 U, __m512d A) { return _mm512_maskz_cvtts_roundpd_epi64(U, A, _MM_FROUND_NO_EXC); } -__m512i test_mm512_cvttspd_epu64(__m512d A) { - // CHECK-LABEL: test_mm512_cvttspd_epu64 +__m512i test_mm512_cvtts_pd_epu64(__m512d A) { + // CHECK-LABEL: test_mm512_cvtts_pd_epu64 // CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.512(<8 x double> - return _mm512_cvttspd_epu64(A); + return _mm512_cvtts_pd_epu64(A); } -__m512i test_mm512_mask_cvttspd_epu64(__m512i W, __mmask8 U, __m512d A) { - // CHECK-LABEL: test_mm512_mask_cvttspd_epu64 +__m512i test_mm512_mask_cvtts_pd_epu64(__m512i W, __mmask8 U, __m512d A) { + // CHECK-LABEL: test_mm512_mask_cvtts_pd_epu64 // CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.512(<8 x double> - return _mm512_mask_cvttspd_epu64(W, U, A); + return _mm512_mask_cvtts_pd_epu64(W, U, A); } -__m512i test_mm512_maskz_cvttspd_epu64(__mmask8 U, __m512d A) { - // CHECK-LABEL: test_mm512_maskz_cvttspd_epu64 +__m512i test_mm512_maskz_cvtts_pd_epu64(__mmask8 U, __m512d A) { + // CHECK-LABEL: test_mm512_maskz_cvtts_pd_epu64 // CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.512(<8 x double> - return _mm512_maskz_cvttspd_epu64(U, A); + return _mm512_maskz_cvtts_pd_epu64(U, A); } __m512i test_mm512_cvtts_roundpd_epu64(__m512d A) { @@ -111,22 +111,22 @@ __m512i test_mm512_maskz_cvtts_roundpd_epu64(__mmask8 U, __m512d A) { return _mm512_maskz_cvtts_roundpd_epu64(U, A, _MM_FROUND_NO_EXC); } -__m512i test_mm512_cvttsps_epi64(__m256 A) { - // CHECK-LABEL: test_mm512_cvttsps_epi64 +__m512i test_mm512_cvtts_ps_epi64(__m256 A) { + // CHECK-LABEL: test_mm512_cvtts_ps_epi64 // CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.512(<8 x float> - return _mm512_cvttsps_epi64(A); + return _mm512_cvtts_ps_epi64(A); } -__m512i test_mm512_mask_cvttsps_epi64(__m512i W, __mmask8 U, __m256 A) { - // CHECK-LABEL: test_mm512_mask_cvttsps_epi64 +__m512i test_mm512_mask_cvtts_ps_epi64(__m512i W, __mmask8 U, __m256 A) { + // CHECK-LABEL: test_mm512_mask_cvtts_ps_epi64 // CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.512(<8 x float> - return _mm512_mask_cvttsps_epi64(W, U, A); + return _mm512_mask_cvtts_ps_epi64(W, U, A); } -__m512i test_mm512_maskz_cvttsps_epi64(__mmask8 U, __m256 A) { - // CHECK-LABEL: test_mm512_maskz_cvttsps_epi64 +__m512i test_mm512_maskz_cvtts_ps_epi64(__mmask8 U, __m256 A) { + // CHECK-LABEL: test_mm512_maskz_cvtts_ps_epi64 // CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.512(<8 x float> - return _mm512_maskz_cvttsps_epi64(U, A); + return _mm512_maskz_cvtts_ps_epi64(U, A); } __m512i test_mm512_cvtts_roundps_epi64(__m256 A) { @@ -147,22 +147,22 @@ __m512i test_mm512_maskz_cvtts_roundps_epi64(__mmask8 U, __m256 A) { return _mm512_maskz_cvtts_roundps_epi64(U, A, _MM_FROUND_NO_EXC); } -__m512i test_mm512_cvttsps_epu64(__m256 A) { - // CHECK-LABEL: test_mm512_cvttsps_epu64 +__m512i test_mm512_cvtts_ps_epu64(__m256 A) { + // CHECK-LABEL: test_mm512_cvtts_ps_epu64 // CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.512(<8 x float> - return _mm512_cvttsps_epu64(A); + return _mm512_cvtts_ps_epu64(A); } -__m512i test_mm512_mask_cvttsps_epu64(__m512i W, __mmask8 U, __m256 A) { - // CHECK-LABEL: test_mm512_mask_cvttsps_epu64 +__m512i test_mm512_mask_cvtts_ps_epu64(__m512i W, __mmask8 U, __m256 A) { + // CHECK-LABEL: test_mm512_mask_cvtts_ps_epu64 // CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.512(<8 x float> - return _mm512_mask_cvttsps_epu64(W, U, A); + return _mm512_mask_cvtts_ps_epu64(W, U, A); } -__m512i test_mm512_maskz_cvttsps_epu64(__mmask8 U, __m256 A) { - // CHECK-LABEL: test_mm512_maskz_cvttsps_epu64 +__m512i test_mm512_maskz_cvtts_ps_epu64(__mmask8 U, __m256 A) { + // CHECK-LABEL: test_mm512_maskz_cvtts_ps_epu64 // CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.512(<8 x float> - return _mm512_maskz_cvttsps_epu64(U, A); + return _mm512_maskz_cvtts_ps_epu64(U, A); } __m512i test_mm512_cvtts_roundps_epu64(__m256 A) { diff --git a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins.c b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins.c index cccee04627d22..c1b6df3cb07f5 100644 --- a/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins.c +++ b/clang/test/CodeGen/X86/avx10_2_512satcvtds-builtins.c @@ -4,22 +4,22 @@ #include #include -__m256i test_mm512_cvttspd_epi32(__m512d A) { - // CHECK-LABEL: test_mm512_cvttspd_epi32 +__m256i test_mm512_cvtts_pd_epi32(__m512d A) { + // CHECK-LABEL: test_mm512_cvtts_pd_epi32 // CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.round.512(<8 x double> - return _mm512_cvttspd_epi32(A); + return _mm512_cvtts_pd_epi32(A); } -__m256i test_mm512_mask_cvttspd_epi32(__m256i W, __mmask8 U, __m512d A) { - // CHECK-LABEL: test_mm512_mask_cvttspd_epi32 +__m256i test_mm512_mask_cvtts_pd_epi32(__m256i W, __mmask8 U, __m512d A) { + // CHECK-LABEL: test_mm512_mask_cvtts_pd_epi32 // CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.round.512(<8 x double> - return _mm512_mask_cvttspd_epi32(W, U, A); + return _mm512_mask_cvtts_pd_epi32(W, U, A); } -__m256i test_mm512_maskz_cvttspd_epi32(__mmask8 U, __m512d A) { - // CHECK-LABEL: test_mm512_maskz_cvttspd_epi32 +__m256i test_mm512_maskz_cvtts_pd_epi32(__mmask8 U, __m512d A) { + // CHECK-LABEL: test_mm512_maskz_cvtts_pd_epi32 // CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.round.512(<8 x double> - return _mm512_maskz_cvttspd_epi32(U, A); + return _mm512_maskz_cvtts_pd_epi32(U, A); } __m256i test_mm512_cvtts_roundpd_epi32(__m512d A) { @@ -40,22 +40,22 @@ __m256i test_mm512_maskz_cvtts_roundpd_epi32(__mmask8 U, __m512d A) { return _mm512_maskz_cvtts_roundpd_epi32(U, A, _MM_FROUND_NO_EXC); } -__m256i test_mm512_cvttspd_epu32(__m512d A) { - // CHECK-LABEL: test_mm512_cvttspd_epu32 +__m256i test_mm512_cvtts_pd_epu32(__m512d A) { + // CHECK-LABEL: test_mm512_cvtts_pd_epu32 // CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.round.512(<8 x double> - return _mm512_cvttspd_epu32(A); + return _mm512_cvtts_pd_epu32(A); } -__m256i test_mm512_mask_cvttspd_epu32(__m256i W, __mmask8 U, __m512d A) { - // CHECK-LABEL: test_mm512_mask_cvttspd_epu32 +__m256i test_mm512_mask_cvtts_pd_epu32(__m256i W, __mmask8 U, __m512d A) { + // CHECK-LABEL: test_mm512_mask_cvtts_pd_epu32 // CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.round.512(<8 x double> - return _mm512_mask_cvttspd_epu32(W, U, A); + return _mm512_mask_cvtts_pd_epu32(W, U, A); } -__m256i test_mm512_maskz_cvttspd_epu32(__mmask8 U, __m512d A) { - // CHECK-LABEL: test_mm512_maskz_cvttspd_epu32 +__m256i test_mm512_maskz_cvtts_pd_epu32(__mmask8 U, __m512d A) { + // CHECK-LABEL: test_mm512_maskz_cvtts_pd_epu32 // CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.round.512(<8 x double> - return _mm512_maskz_cvttspd_epu32(U, A); + return _mm512_maskz_cvtts_pd_epu32(U, A); } __m256i test_mm512_cvtts_roundpd_epu32(__m512d A) { @@ -76,22 +76,22 @@ __m256i test_mm512_maskz_cvtts_roundpd_epu32(__mmask8 U, __m512d A) { return _mm512_maskz_cvtts_roundpd_epu32(U, A, _MM_FROUND_NO_EXC); } -__m512i test_mm512_cvttsps_epi32(__m512 A) { - // CHECK-LABEL: test_mm512_cvttsps_epi32 +__m512i test_mm512_cvtts_ps_epi32(__m512 A) { + // CHECK-LABEL: test_mm512_cvtts_ps_epi32 // CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.round.512(<16 x float> - return _mm512_cvttsps_epi32(A); + return _mm512_cvtts_ps_epi32(A); } -__m512i test_mm512_mask_cvttsps_epi32(__m512i W, __mmask8 U, __m512 A) { - // CHECK-LABEL: test_mm512_mask_cvttsps_epi32 +__m512i test_mm512_mask_cvtts_ps_epi32(__m512i W, __mmask8 U, __m512 A) { + // CHECK-LABEL: test_mm512_mask_cvtts_ps_epi32 // CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.round.512(<16 x float> - return _mm512_mask_cvttsps_epi32(W, U, A); + return _mm512_mask_cvtts_ps_epi32(W, U, A); } -__m512i test_mm512_maskz_cvttsps_epi32(__mmask8 U, __m512 A) { - // CHECK-LABEL: test_mm512_maskz_cvttsps_epi32 +__m512i test_mm512_maskz_cvtts_ps_epi32(__mmask8 U, __m512 A) { + // CHECK-LABEL: test_mm512_maskz_cvtts_ps_epi32 // CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.round.512(<16 x float> - return _mm512_maskz_cvttsps_epi32(U, A); + return _mm512_maskz_cvtts_ps_epi32(U, A); } __m512i test_mm512_cvtts_roundps_epi32(__m512 A) { @@ -112,22 +112,22 @@ __m512i test_mm512_maskz_cvtts_roundps_epi32(__mmask8 U, __m512 A) { return _mm512_maskz_cvtts_roundps_epi32(U, A, _MM_FROUND_NO_EXC); } -__m512i test_mm512_cvttsps_epu32(__m512 A) { - // CHECK-LABEL: test_mm512_cvttsps_epu32 +__m512i test_mm512_cvtts_ps_epu32(__m512 A) { + // CHECK-LABEL: test_mm512_cvtts_ps_epu32 // CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.round.512(<16 x float> - return _mm512_cvttsps_epu32(A); + return _mm512_cvtts_ps_epu32(A); } -__m512i test_mm512_mask_cvttsps_epu32(__m512i W, __mmask8 U, __m512 A) { - // CHECK-LABEL: test_mm512_mask_cvttsps_epu32 +__m512i test_mm512_mask_cvtts_ps_epu32(__m512i W, __mmask8 U, __m512 A) { + // CHECK-LABEL: test_mm512_mask_cvtts_ps_epu32 // CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.round.512(<16 x float> - return _mm512_mask_cvttsps_epu32(W, U, A); + return _mm512_mask_cvtts_ps_epu32(W, U, A); } -__m512i test_mm512_maskz_cvttsps_epu32(__mmask8 U, __m512 A) { - // CHECK-LABEL: test_mm512_maskz_cvttsps_epu32 +__m512i test_mm512_maskz_cvtts_ps_epu32(__mmask8 U, __m512 A) { + // CHECK-LABEL: test_mm512_maskz_cvtts_ps_epu32 // CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.round.512(<16 x float> - return _mm512_maskz_cvttsps_epu32(U, A); + return _mm512_maskz_cvtts_ps_epu32(U, A); } __m512i test_mm512_cvtts_roundps_epu32(__m512 A) { @@ -148,4 +148,4 @@ __m512i test_mm512_maskz_cvtts_roundps_epu32(__mmask8 U, __m512 A) { } // X64: {{.*}} -// X86: {{.*}} \ No newline at end of file +// X86: {{.*}} diff --git a/clang/test/CodeGen/X86/avx10_2convert-builtins.c b/clang/test/CodeGen/X86/avx10_2convert-builtins.c index 87fc6ffd7bc17..31dd0ecc381ef 100644 --- a/clang/test/CodeGen/X86/avx10_2convert-builtins.c +++ b/clang/test/CodeGen/X86/avx10_2convert-builtins.c @@ -41,24 +41,6 @@ __m256h test_mm256_maskz_cvtx2ps_ph(__mmask16 __U, __m256 __A, __m256 __B) { return _mm256_maskz_cvtx2ps_ph(__U, __A, __B); } -__m256h test_mm256_cvtx_round2ps_ph(__m256 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_cvtx_round2ps_ph( - // CHECK: call <16 x half> @llvm.x86.avx10.mask.vcvt2ps2phx.256( - return _mm256_cvtx_round2ps_ph(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_mask_cvtx_round2ps_ph(__m256h __W, __mmask8 __U, __m256 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_mask_cvtx_round2ps_ph( - // CHECK: call <16 x half> @llvm.x86.avx10.mask.vcvt2ps2phx.256( - return _mm256_mask_cvtx_round2ps_ph(__W, __U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_maskz_cvtx_round2ps_ph(__mmask8 __U, __m256 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_maskz_cvtx_round2ps_ph( - // CHECK: call <16 x half> @llvm.x86.avx10.mask.vcvt2ps2phx.256( - return _mm256_maskz_cvtx_round2ps_ph(__U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - __m128i test_mm_cvtbiasph_bf8(__m128i __A, __m128h __B) { // CHECK-LABEL: @test_mm_cvtbiasph_bf8( // CHECK: call <16 x i8> @llvm.x86.avx10.mask.vcvtbiasph2bf8128( @@ -95,40 +77,40 @@ __m128i test_mm256_maskz_cvtbiasph_bf8(__mmask16 __U, __m256i __A, __m256h __B) return _mm256_maskz_cvtbiasph_bf8(__U, __A, __B); } -__m128i test_mm_cvtbiassph_bf8(__m128i __A, __m128h __B) { - // CHECK-LABEL: @test_mm_cvtbiassph_bf8( +__m128i test_mm_cvts_biasph_bf8(__m128i __A, __m128h __B) { + // CHECK-LABEL: @test_mm_cvts_biasph_bf8( // CHECK: call <16 x i8> @llvm.x86.avx10.mask.vcvtbiasph2bf8s128( - return _mm_cvtbiassph_bf8(__A, __B); + return _mm_cvts_biasph_bf8(__A, __B); } -__m128i test_mm_mask_cvtbiassph_bf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) { - // CHECK-LABEL: @test_mm_mask_cvtbiassph_bf8( +__m128i test_mm_mask_cvts_biasph_bf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) { + // CHECK-LABEL: @test_mm_mask_cvts_biasph_bf8( // CHECK: call <16 x i8> @llvm.x86.avx10.mask.vcvtbiasph2bf8s128( - return _mm_mask_cvtbiassph_bf8(__W, __U, __A, __B); + return _mm_mask_cvts_biasph_bf8(__W, __U, __A, __B); } -__m128i test_mm_maskz_cvtbiassph_bf8(__mmask8 __U, __m128i __A, __m128h __B) { - // CHECK-LABEL: @test_mm_maskz_cvtbiassph_bf8( +__m128i test_mm_maskz_cvts_biasph_bf8(__mmask8 __U, __m128i __A, __m128h __B) { + // CHECK-LABEL: @test_mm_maskz_cvts_biasph_bf8( // CHECK: call <16 x i8> @llvm.x86.avx10.mask.vcvtbiasph2bf8s128( - return _mm_maskz_cvtbiassph_bf8(__U, __A, __B); + return _mm_maskz_cvts_biasph_bf8(__U, __A, __B); } -__m128i test_mm256_cvtbiassph_bf8(__m256i __A, __m256h __B) { - // CHECK-LABEL: @test_mm256_cvtbiassph_bf8( +__m128i test_mm256_cvts_biasph_bf8(__m256i __A, __m256h __B) { + // CHECK-LABEL: @test_mm256_cvts_biasph_bf8( // CHECK: call <16 x i8> @llvm.x86.avx10.mask.vcvtbiasph2bf8s256( - return _mm256_cvtbiassph_bf8(__A, __B); + return _mm256_cvts_biasph_bf8(__A, __B); } -__m128i test_mm256_mask_cvtbiassph_bf8(__m128i __W, __mmask16 __U, __m256i __A, __m256h __B) { - // CHECK-LABEL: @test_mm256_mask_cvtbiassph_bf8( +__m128i test_mm256_mask_cvts_biasph_bf8(__m128i __W, __mmask16 __U, __m256i __A, __m256h __B) { + // CHECK-LABEL: @test_mm256_mask_cvts_biasph_bf8( // CHECK: call <16 x i8> @llvm.x86.avx10.mask.vcvtbiasph2bf8s256( - return _mm256_mask_cvtbiassph_bf8(__W, __U, __A, __B); + return _mm256_mask_cvts_biasph_bf8(__W, __U, __A, __B); } -__m128i test_mm256_maskz_cvtbiassph_bf8(__mmask16 __U, __m256i __A, __m256h __B) { - // CHECK-LABEL: @test_mm256_maskz_cvtbiassph_bf8( +__m128i test_mm256_maskz_cvts_biasph_bf8(__mmask16 __U, __m256i __A, __m256h __B) { + // CHECK-LABEL: @test_mm256_maskz_cvts_biasph_bf8( // CHECK: call <16 x i8> @llvm.x86.avx10.mask.vcvtbiasph2bf8s256( - return _mm256_maskz_cvtbiassph_bf8(__U, __A, __B); + return _mm256_maskz_cvts_biasph_bf8(__U, __A, __B); } __m128i test_mm_cvtbiasph_hf8(__m128i __A, __m128h __B) { @@ -167,40 +149,40 @@ __m128i test_mm256_maskz_cvtbiasph_hf8(__mmask16 __U, __m256i __A, __m256h __B) return _mm256_maskz_cvtbiasph_hf8(__U, __A, __B); } -__m128i test_mm_cvtbiassph_hf8(__m128i __A, __m128h __B) { - // CHECK-LABEL: @test_mm_cvtbiassph_hf8( +__m128i test_mm_cvts_biasph_hf8(__m128i __A, __m128h __B) { + // CHECK-LABEL: @test_mm_cvts_biasph_hf8( // CHECK: call <16 x i8> @llvm.x86.avx10.mask.vcvtbiasph2hf8s128( - return _mm_cvtbiassph_hf8(__A, __B); + return _mm_cvts_biasph_hf8(__A, __B); } -__m128i test_mm_mask_cvtbiassph_hf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) { - // CHECK-LABEL: @test_mm_mask_cvtbiassph_hf8( +__m128i test_mm_mask_cvts_biasph_hf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) { + // CHECK-LABEL: @test_mm_mask_cvts_biasph_hf8( // CHECK: call <16 x i8> @llvm.x86.avx10.mask.vcvtbiasph2hf8s128( - return _mm_mask_cvtbiassph_hf8(__W, __U, __A, __B); + return _mm_mask_cvts_biasph_hf8(__W, __U, __A, __B); } -__m128i test_mm_maskz_cvtbiassph_hf8(__mmask8 __U, __m128i __A, __m128h __B) { - // CHECK-LABEL: @test_mm_maskz_cvtbiassph_hf8( +__m128i test_mm_maskz_cvts_biasph_hf8(__mmask8 __U, __m128i __A, __m128h __B) { + // CHECK-LABEL: @test_mm_maskz_cvts_biasph_hf8( // CHECK: call <16 x i8> @llvm.x86.avx10.mask.vcvtbiasph2hf8s128( - return _mm_maskz_cvtbiassph_hf8(__U, __A, __B); + return _mm_maskz_cvts_biasph_hf8(__U, __A, __B); } -__m128i test_mm256_cvtbiassph_hf8(__m256i __A, __m256h __B) { - // CHECK-LABEL: @test_mm256_cvtbiassph_hf8( +__m128i test_mm256_cvts_biasph_hf8(__m256i __A, __m256h __B) { + // CHECK-LABEL: @test_mm256_cvts_biasph_hf8( // CHECK: call <16 x i8> @llvm.x86.avx10.mask.vcvtbiasph2hf8s256( - return _mm256_cvtbiassph_hf8(__A, __B); + return _mm256_cvts_biasph_hf8(__A, __B); } -__m128i test_mm256_mask_cvtbiassph_hf8(__m128i __W, __mmask16 __U, __m256i __A, __m256h __B) { - // CHECK-LABEL: @test_mm256_mask_cvtbiassph_hf8( +__m128i test_mm256_mask_cvts_biasph_hf8(__m128i __W, __mmask16 __U, __m256i __A, __m256h __B) { + // CHECK-LABEL: @test_mm256_mask_cvts_biasph_hf8( // CHECK: call <16 x i8> @llvm.x86.avx10.mask.vcvtbiasph2hf8s256( - return _mm256_mask_cvtbiassph_hf8(__W, __U, __A, __B); + return _mm256_mask_cvts_biasph_hf8(__W, __U, __A, __B); } -__m128i test_mm256_maskz_cvtbiassph_hf8(__mmask16 __U, __m256i __A, __m256h __B) { - // CHECK-LABEL: @test_mm256_maskz_cvtbiassph_hf8( +__m128i test_mm256_maskz_cvts_biasph_hf8(__mmask16 __U, __m256i __A, __m256h __B) { + // CHECK-LABEL: @test_mm256_maskz_cvts_biasph_hf8( // CHECK: call <16 x i8> @llvm.x86.avx10.mask.vcvtbiasph2hf8s256( - return _mm256_maskz_cvtbiassph_hf8(__U, __A, __B); + return _mm256_maskz_cvts_biasph_hf8(__U, __A, __B); } __m128i test_mm_cvt2ph_bf8(__m128h __A, __m128h __B) { @@ -247,48 +229,48 @@ __m256i test_mm256_maskz_cvt2ph_bf8(__mmask32 __U, __m256h __A, __m256h __B) { return _mm256_maskz_cvt2ph_bf8(__U, __A, __B); } -__m128i test_mm_cvts2ph_bf8(__m128h __A, __m128h __B) { - // CHECK-LABEL: @test_mm_cvts2ph_bf8( +__m128i test_mm_cvts_2ph_bf8(__m128h __A, __m128h __B) { + // CHECK-LABEL: @test_mm_cvts_2ph_bf8( // CHECK: call <16 x i8> @llvm.x86.avx10.vcvt2ph2bf8s128( - return _mm_cvts2ph_bf8(__A, __B); + return _mm_cvts_2ph_bf8(__A, __B); } -__m128i test_mm_mask_cvts2ph_bf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) { - // CHECK-LABEL: @test_mm_mask_cvts2ph_bf8( +__m128i test_mm_mask_cvts_2ph_bf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) { + // CHECK-LABEL: @test_mm_mask_cvts_2ph_bf8( // CHECK: call <16 x i8> @llvm.x86.avx10.vcvt2ph2bf8s128( // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} // CHECK: ret <2 x i64> %{{.*}} - return _mm_mask_cvts2ph_bf8(__W, __U, __A, __B); + return _mm_mask_cvts_2ph_bf8(__W, __U, __A, __B); } -__m128i test_mm_maskz_cvts2ph_bf8(__mmask16 __U, __m128h __A, __m128h __B) { - // CHECK-LABEL: @test_mm_maskz_cvts2ph_bf8( +__m128i test_mm_maskz_cvts_2ph_bf8(__mmask16 __U, __m128h __A, __m128h __B) { + // CHECK-LABEL: @test_mm_maskz_cvts_2ph_bf8( // CHECK: call <16 x i8> @llvm.x86.avx10.vcvt2ph2bf8s128( // CHECK: zeroinitializer // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} - return _mm_maskz_cvts2ph_bf8(__U, __A, __B); + return _mm_maskz_cvts_2ph_bf8(__U, __A, __B); } -__m256i test_mm256_cvts2ph_bf8(__m256h __A, __m256h __B) { - // CHECK-LABEL: @test_mm256_cvts2ph_bf8( +__m256i test_mm256_cvts_2ph_bf8(__m256h __A, __m256h __B) { + // CHECK-LABEL: @test_mm256_cvts_2ph_bf8( // CHECK: call <32 x i8> @llvm.x86.avx10.vcvt2ph2bf8s256( - return _mm256_cvts2ph_bf8(__A, __B); + return _mm256_cvts_2ph_bf8(__A, __B); } -__m256i test_mm256_mask_cvts2ph_bf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { - // CHECK-LABEL: @test_mm256_mask_cvts2ph_bf8( +__m256i test_mm256_mask_cvts_2ph_bf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { + // CHECK-LABEL: @test_mm256_mask_cvts_2ph_bf8( // CHECK: call <32 x i8> @llvm.x86.avx10.vcvt2ph2bf8s256( // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} // CHECK: ret <4 x i64> %{{.*}} - return _mm256_mask_cvts2ph_bf8(__W, __U, __A, __B); + return _mm256_mask_cvts_2ph_bf8(__W, __U, __A, __B); } -__m256i test_mm256_maskz_cvts2ph_bf8(__mmask32 __U, __m256h __A, __m256h __B) { - // CHECK-LABEL: @test_mm256_maskz_cvts2ph_bf8( +__m256i test_mm256_maskz_cvts_2ph_bf8(__mmask32 __U, __m256h __A, __m256h __B) { + // CHECK-LABEL: @test_mm256_maskz_cvts_2ph_bf8( // CHECK: call <32 x i8> @llvm.x86.avx10.vcvt2ph2bf8s256( // CHECK: zeroinitializer // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} - return _mm256_maskz_cvts2ph_bf8(__U, __A, __B); + return _mm256_maskz_cvts_2ph_bf8(__U, __A, __B); } __m128i test_mm_cvt2ph_hf8(__m128h __A, __m128h __B) { @@ -335,48 +317,48 @@ __m256i test_mm256_maskz_cvt2ph_hf8(__mmask32 __U, __m256h __A, __m256h __B) { return _mm256_maskz_cvt2ph_hf8(__U, __A, __B); } -__m128i test_mm_cvts2ph_hf8(__m128h __A, __m128h __B) { - // CHECK-LABEL: @test_mm_cvts2ph_hf8( +__m128i test_mm_cvts_2ph_hf8(__m128h __A, __m128h __B) { + // CHECK-LABEL: @test_mm_cvts_2ph_hf8( // CHECK: call <16 x i8> @llvm.x86.avx10.vcvt2ph2hf8s128( - return _mm_cvts2ph_hf8(__A, __B); + return _mm_cvts_2ph_hf8(__A, __B); } -__m128i test_mm_mask_cvts2ph_hf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) { - // CHECK-LABEL: @test_mm_mask_cvts2ph_hf8( +__m128i test_mm_mask_cvts_2ph_hf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) { + // CHECK-LABEL: @test_mm_mask_cvts_2ph_hf8( // CHECK: call <16 x i8> @llvm.x86.avx10.vcvt2ph2hf8s128( // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} // CHECK: ret <2 x i64> %{{.*}} - return _mm_mask_cvts2ph_hf8(__W, __U, __A, __B); + return _mm_mask_cvts_2ph_hf8(__W, __U, __A, __B); } -__m128i test_mm_maskz_cvts2ph_hf8(__mmask16 __U, __m128h __A, __m128h __B) { - // CHECK-LABEL: @test_mm_maskz_cvts2ph_hf8( +__m128i test_mm_maskz_cvts_2ph_hf8(__mmask16 __U, __m128h __A, __m128h __B) { + // CHECK-LABEL: @test_mm_maskz_cvts_2ph_hf8( // CHECK: call <16 x i8> @llvm.x86.avx10.vcvt2ph2hf8s128( // CHECK: zeroinitializer // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} - return _mm_maskz_cvts2ph_hf8(__U, __A, __B); + return _mm_maskz_cvts_2ph_hf8(__U, __A, __B); } -__m256i test_mm256_cvts2ph_hf8(__m256h __A, __m256h __B) { - // CHECK-LABEL: @test_mm256_cvts2ph_hf8( +__m256i test_mm256_cvts_2ph_hf8(__m256h __A, __m256h __B) { + // CHECK-LABEL: @test_mm256_cvts_2ph_hf8( // CHECK: call <32 x i8> @llvm.x86.avx10.vcvt2ph2hf8s256( - return _mm256_cvts2ph_hf8(__A, __B); + return _mm256_cvts_2ph_hf8(__A, __B); } -__m256i test_mm256_mask_cvts2ph_hf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { - // CHECK-LABEL: @test_mm256_mask_cvts2ph_hf8( +__m256i test_mm256_mask_cvts_2ph_hf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { + // CHECK-LABEL: @test_mm256_mask_cvts_2ph_hf8( // CHECK: call <32 x i8> @llvm.x86.avx10.vcvt2ph2hf8s256( // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} // CHECK: ret <4 x i64> %{{.*}} - return _mm256_mask_cvts2ph_hf8(__W, __U, __A, __B); + return _mm256_mask_cvts_2ph_hf8(__W, __U, __A, __B); } -__m256i test_mm256_maskz_cvts2ph_hf8(__mmask32 __U, __m256h __A, __m256h __B) { - // CHECK-LABEL: @test_mm256_maskz_cvts2ph_hf8( +__m256i test_mm256_maskz_cvts_2ph_hf8(__mmask32 __U, __m256h __A, __m256h __B) { + // CHECK-LABEL: @test_mm256_maskz_cvts_2ph_hf8( // CHECK: call <32 x i8> @llvm.x86.avx10.vcvt2ph2hf8s256( // CHECK: zeroinitializer // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} - return _mm256_maskz_cvts2ph_hf8(__U, __A, __B); + return _mm256_maskz_cvts_2ph_hf8(__U, __A, __B); } __m128h test_mm_cvthf8_ph(__m128i __A) { @@ -451,40 +433,40 @@ __m128i test_mm256_maskz_cvtph_bf8(__mmask16 __A, __m256h __B) { return _mm256_maskz_cvtph_bf8(__A, __B); } -__m128i test_mm_cvtsph_bf8(__m128h __A) { - // CHECK-LABEL: @test_mm_cvtsph_bf8( +__m128i test_mm_cvts_ph_bf8(__m128h __A) { + // CHECK-LABEL: @test_mm_cvts_ph_bf8( // CHECK: call <16 x i8> @llvm.x86.avx10.mask.vcvtph2bf8s128( - return _mm_cvtsph_bf8(__A); + return _mm_cvts_ph_bf8(__A); } -__m128i test_mm_mask_cvtsph_bf8(__m128i __A, __mmask8 __B, __m128h __C) { - // CHECK-LABEL: @test_mm_mask_cvtsph_bf8( +__m128i test_mm_mask_cvts_ph_bf8(__m128i __A, __mmask8 __B, __m128h __C) { + // CHECK-LABEL: @test_mm_mask_cvts_ph_bf8( // CHECK: call <16 x i8> @llvm.x86.avx10.mask.vcvtph2bf8s128( - return _mm_mask_cvtsph_bf8(__A, __B, __C); + return _mm_mask_cvts_ph_bf8(__A, __B, __C); } -__m128i test_mm_maskz_cvtsph_bf8(__mmask8 __A, __m128h __B) { - // CHECK-LABEL: @test_mm_maskz_cvtsph_bf8( +__m128i test_mm_maskz_cvts_ph_bf8(__mmask8 __A, __m128h __B) { + // CHECK-LABEL: @test_mm_maskz_cvts_ph_bf8( // CHECK: call <16 x i8> @llvm.x86.avx10.mask.vcvtph2bf8s128( - return _mm_maskz_cvtsph_bf8(__A, __B); + return _mm_maskz_cvts_ph_bf8(__A, __B); } -__m128i test_mm256_cvtsph_bf8(__m256h __A) { - // CHECK-LABEL: @test_mm256_cvtsph_bf8( +__m128i test_mm256_cvts_ph_bf8(__m256h __A) { + // CHECK-LABEL: @test_mm256_cvts_ph_bf8( // CHECK: call <16 x i8> @llvm.x86.avx10.mask.vcvtph2bf8s256( - return _mm256_cvtsph_bf8(__A); + return _mm256_cvts_ph_bf8(__A); } -__m128i test_mm256_mask_cvtsph_bf8(__m128i __A, __mmask16 __B, __m256h __C) { - // CHECK-LABEL: @test_mm256_mask_cvtsph_bf8( +__m128i test_mm256_mask_cvts_ph_bf8(__m128i __A, __mmask16 __B, __m256h __C) { + // CHECK-LABEL: @test_mm256_mask_cvts_ph_bf8( // CHECK: call <16 x i8> @llvm.x86.avx10.mask.vcvtph2bf8s256( - return _mm256_mask_cvtsph_bf8(__A, __B, __C); + return _mm256_mask_cvts_ph_bf8(__A, __B, __C); } -__m128i test_mm256_maskz_cvtsph_bf8(__mmask16 __A, __m256h __B) { - // CHECK-LABEL: @test_mm256_maskz_cvtsph_bf8( +__m128i test_mm256_maskz_cvts_ph_bf8(__mmask16 __A, __m256h __B) { + // CHECK-LABEL: @test_mm256_maskz_cvts_ph_bf8( // CHECK: call <16 x i8> @llvm.x86.avx10.mask.vcvtph2bf8s256( - return _mm256_maskz_cvtsph_bf8(__A, __B); + return _mm256_maskz_cvts_ph_bf8(__A, __B); } __m128i test_mm_cvtph_hf8(__m128h __A) { @@ -523,40 +505,40 @@ __m128i test_mm256_maskz_cvtph_hf8(__mmask16 __A, __m256h __B) { return _mm256_maskz_cvtph_hf8(__A, __B); } -__m128i test_mm_cvtsph_hf8(__m128h __A) { - // CHECK-LABEL: @test_mm_cvtsph_hf8( +__m128i test_mm_cvts_ph_hf8(__m128h __A) { + // CHECK-LABEL: @test_mm_cvts_ph_hf8( // CHECK: call <16 x i8> @llvm.x86.avx10.mask.vcvtph2hf8s128( - return _mm_cvtsph_hf8(__A); + return _mm_cvts_ph_hf8(__A); } -__m128i test_mm_mask_cvtsph_hf8(__m128i __A, __mmask8 __B, __m128h __C) { - // CHECK-LABEL: @test_mm_mask_cvtsph_hf8( +__m128i test_mm_mask_cvts_ph_hf8(__m128i __A, __mmask8 __B, __m128h __C) { + // CHECK-LABEL: @test_mm_mask_cvts_ph_hf8( // CHECK: call <16 x i8> @llvm.x86.avx10.mask.vcvtph2hf8s128( - return _mm_mask_cvtsph_hf8(__A, __B, __C); + return _mm_mask_cvts_ph_hf8(__A, __B, __C); } -__m128i test_mm_maskz_cvtsph_hf8(__mmask8 __A, __m128h __B) { - // CHECK-LABEL: @test_mm_maskz_cvtsph_hf8( +__m128i test_mm_maskz_cvts_ph_hf8(__mmask8 __A, __m128h __B) { + // CHECK-LABEL: @test_mm_maskz_cvts_ph_hf8( // CHECK: call <16 x i8> @llvm.x86.avx10.mask.vcvtph2hf8s128( - return _mm_maskz_cvtsph_hf8(__A, __B); + return _mm_maskz_cvts_ph_hf8(__A, __B); } -__m128i test_mm256_cvtsph_hf8(__m256h __A) { - // CHECK-LABEL: @test_mm256_cvtsph_hf8( +__m128i test_mm256_cvts_ph_hf8(__m256h __A) { + // CHECK-LABEL: @test_mm256_cvts_ph_hf8( // CHECK: call <16 x i8> @llvm.x86.avx10.mask.vcvtph2hf8s256( - return _mm256_cvtsph_hf8(__A); + return _mm256_cvts_ph_hf8(__A); } -__m128i test_mm256_mask_cvtsph_hf8(__m128i __A, __mmask16 __B, __m256h __C) { - // CHECK-LABEL: @test_mm256_mask_cvtsph_hf8( +__m128i test_mm256_mask_cvts_ph_hf8(__m128i __A, __mmask16 __B, __m256h __C) { + // CHECK-LABEL: @test_mm256_mask_cvts_ph_hf8( // CHECK: call <16 x i8> @llvm.x86.avx10.mask.vcvtph2hf8s256( - return _mm256_mask_cvtsph_hf8(__A, __B, __C); + return _mm256_mask_cvts_ph_hf8(__A, __B, __C); } -__m128i test_mm256_maskz_cvtsph_hf8(__mmask16 __A, __m256h __B) { - // CHECK-LABEL: @test_mm256_maskz_cvtsph_hf8( +__m128i test_mm256_maskz_cvts_ph_hf8(__mmask16 __A, __m256h __B) { + // CHECK-LABEL: @test_mm256_maskz_cvts_ph_hf8( // CHECK: call <16 x i8> @llvm.x86.avx10.mask.vcvtph2hf8s256( - return _mm256_maskz_cvtsph_hf8(__A, __B); + return _mm256_maskz_cvts_ph_hf8(__A, __B); } __m256h test_mm256_cvtbf8_ph(__m128i A) { diff --git a/clang/test/CodeGen/X86/avx10_2minmax-builtins.c b/clang/test/CodeGen/X86/avx10_2minmax-builtins.c index 7e21858c71834..f8238f40c15b5 100644 --- a/clang/test/CodeGen/X86/avx10_2minmax-builtins.c +++ b/clang/test/CodeGen/X86/avx10_2minmax-builtins.c @@ -83,24 +83,6 @@ __m256d test_mm256_maskz_minmax_pd(__mmask8 __A, __m256d __B, __m256d __C) { return _mm256_maskz_minmax_pd(__A, __B, __C, 127); } -__m256d test_mm256_minmax_round_pd(__m256d __A, __m256d __B) { - // CHECK-LABEL: @test_mm256_minmax_round_pd( - // CHECK: call <4 x double> @llvm.x86.avx10.mask.vminmaxpd256.round( - return _mm256_minmax_round_pd(__A, __B, 127, _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_mask_minmax_round_pd(__m256d __A, __mmask8 __B, __m256d __C, __m256d __D) { - // CHECK-LABEL: @test_mm256_mask_minmax_round_pd( - // CHECK: call <4 x double> @llvm.x86.avx10.mask.vminmaxpd256.round( - return _mm256_mask_minmax_round_pd(__A, __B, __C, __D, 127, _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_maskz_minmax_round_pd(__mmask8 __A, __m256d __B, __m256d __C) { - // CHECK-LABEL: @test_mm256_maskz_minmax_round_pd( - // CHECK: call <4 x double> @llvm.x86.avx10.mask.vminmaxpd256.round( - return _mm256_maskz_minmax_round_pd(__A, __B, __C, 127, _MM_FROUND_NO_EXC); -} - __m128h test_mm_minmax_ph(__m128h __A, __m128h __B) { // CHECK-LABEL: @test_mm_minmax_ph( // CHECK: call <8 x half> @llvm.x86.avx10.mask.vminmaxph128( @@ -137,24 +119,6 @@ __m256h test_mm256_maskz_minmax_ph(__mmask16 __A, __m256h __B, __m256h __C) { return _mm256_maskz_minmax_ph(__A, __B, __C, 127); } -__m256h test_mm256_minmax_round_ph(__m256h __A, __m256h __B) { - // CHECK-LABEL: @test_mm256_minmax_round_ph( - // CHECK: call <16 x half> @llvm.x86.avx10.mask.vminmaxph256.round( - return _mm256_minmax_round_ph(__A, __B, 127, _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_mask_minmax_round_ph(__m256h __A, __mmask16 __B, __m256h __C, __m256h __D) { - // CHECK-LABEL: @test_mm256_mask_minmax_round_ph( - // CHECK: call <16 x half> @llvm.x86.avx10.mask.vminmaxph256.round( - return _mm256_mask_minmax_round_ph(__A, __B, __C, __D, 127, _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_maskz_minmax_round_ph(__mmask16 __A, __m256h __B, __m256h __C) { - // CHECK-LABEL: @test_mm256_maskz_minmax_round_ph( - // CHECK: call <16 x half> @llvm.x86.avx10.mask.vminmaxph256.round( - return _mm256_maskz_minmax_round_ph(__A, __B, __C, 127, _MM_FROUND_NO_EXC); -} - __m128 test_mm_minmax_ps(__m128 __A, __m128 __B) { // CHECK-LABEL: @test_mm_minmax_ps( // CHECK: call <4 x float> @llvm.x86.avx10.mask.vminmaxps128( @@ -191,24 +155,6 @@ __m256 test_mm256_maskz_minmax_ps(__mmask8 __A, __m256 __B, __m256 __C) { return _mm256_maskz_minmax_ps(__A, __B, __C, 127); } -__m256 test_mm256_minmax_round_ps(__m256 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_minmax_round_ps( - // CHECK: call <8 x float> @llvm.x86.avx10.mask.vminmaxps256.round( - return _mm256_minmax_round_ps(__A, __B, 127, _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_mask_minmax_round_ps(__m256 __A, __mmask8 __B, __m256 __C, __m256 __D) { - // CHECK-LABEL: @test_mm256_mask_minmax_round_ps( - // CHECK: call <8 x float> @llvm.x86.avx10.mask.vminmaxps256.round( - return _mm256_mask_minmax_round_ps(__A, __B, __C, __D, 127, _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_maskz_minmax_round_ps(__mmask8 __A, __m256 __B, __m256 __C) { - // CHECK-LABEL: @test_mm256_maskz_minmax_round_ps( - // CHECK: call <8 x float> @llvm.x86.avx10.mask.vminmaxps256.round( - return _mm256_maskz_minmax_round_ps(__A, __B, __C, 127, _MM_FROUND_NO_EXC); -} - __m128d test_mm_minmax_sd(__m128d __A, __m128d __B) { // CHECK-LABEL: @test_mm_minmax_sd( // CHECK: call <2 x double> @llvm.x86.avx10.mask.vminmaxsd.round( diff --git a/clang/test/CodeGen/X86/avx10_2ni-builtins.c b/clang/test/CodeGen/X86/avx10_2ni-builtins.c index d06a008c09e71..936be27da61d7 100644 --- a/clang/test/CodeGen/X86/avx10_2ni-builtins.c +++ b/clang/test/CodeGen/X86/avx10_2ni-builtins.c @@ -424,2408 +424,3 @@ __m256i test_mm256_maskz_dpwuuds_epi32(__m256i __A, __mmask8 __B, __m256i __C, _ // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_dpwuuds_epi32(__A, __B, __C, __D); } - -// YMM Rounding -__m256d test_mm256_add_round_pd(__m256d __A, __m256d __B) { -// CHECK-LABEL: @test_mm256_add_round_pd -// CHECK: @llvm.x86.avx10.vaddpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 11) - return _mm256_add_round_pd(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_mask_add_round_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { -// CHECK-LABEL: @test_mm256_mask_add_round_pd -// CHECK: @llvm.x86.avx10.vaddpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 10) -// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} - return _mm256_mask_add_round_pd(__W, __U, __A, __B, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_maskz_add_round_pd(__mmask8 __U, __m256d __A, __m256d __B) { -// CHECK-LABEL: @test_mm256_maskz_add_round_pd -// CHECK: @llvm.x86.avx10.vaddpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 9) -// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} - return _mm256_maskz_add_round_pd(__U, __A, __B, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_add_round_ph(__m256h __A, __m256h __B) { -// CHECK-LABEL: @test_mm256_add_round_ph -// CHECK: @llvm.x86.avx10.vaddph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 11) - return _mm256_add_round_ph(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_mask_add_round_ph(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) { -// CHECK-LABEL: @test_mm256_mask_add_round_ph -// CHECK: @llvm.x86.avx10.vaddph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 10) -// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}} - return _mm256_mask_add_round_ph(__W, __U, __A, __B, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_maskz_add_round_ph(__mmask8 __U, __m256h __A, __m256h __B) { -// CHECK-LABEL: @test_mm256_maskz_add_round_ph -// CHECK: @llvm.x86.avx10.vaddph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 9) -// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}} - return _mm256_maskz_add_round_ph(__U, __A, __B, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_add_round_ps(__m256 __A, __m256 __B) { -// CHECK-LABEL: @test_mm256_add_round_ps -// CHECK: @llvm.x86.avx10.vaddps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 11) - return _mm256_add_round_ps(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_mask_add_round_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { -// CHECK-LABEL: @test_mm256_mask_add_round_ps -// CHECK: @llvm.x86.avx10.vaddps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 10) -// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_mask_add_round_ps(__W, __U, __A, __B, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_maskz_add_round_ps(__mmask8 __U, __m256 __A, __m256 __B) { -// CHECK-LABEL: @test_mm256_maskz_add_round_ps -// CHECK: @llvm.x86.avx10.vaddps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 9) -// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_maskz_add_round_ps(__U, __A, __B, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); -} - -__mmask8 test_mm256_cmp_round_pd_mask(__m256d a, __m256d b) { -// CHECK-LABEL: @test_mm256_cmp_round_pd_mask -// CHECK: fcmp oeq <4 x double> %{{.*}}, %{{.*}} - return _mm256_cmp_round_pd_mask(a, b, _CMP_EQ_OQ, _MM_FROUND_NO_EXC); -} - -__mmask8 test_mm256_mask_cmp_round_pd_mask(__mmask8 m, __m256d a, __m256d b) { -// CHECK-LABEL: @test_mm256_mask_cmp_round_pd_mask -// CHECK: [[CMP:%.*]] = fcmp oeq <4 x double> %{{.*}}, %{{.*}} -// CHECK: and <4 x i1> [[CMP]], {{.*}} - return _mm256_mask_cmp_round_pd_mask(m, a, b, _CMP_EQ_OQ, _MM_FROUND_NO_EXC); -} - -__mmask16 test_mm256_cmp_round_ph_mask(__m256h a, __m256h b) { -// CHECK-LABEL: @test_mm256_cmp_round_ph_mask -// CHECK: fcmp oeq <16 x half> %{{.*}}, %{{.*}} - return _mm256_cmp_round_ph_mask(a, b, _CMP_EQ_OQ, _MM_FROUND_NO_EXC); -} - -__mmask16 test_mm256_mask_cmp_round_ph_mask(__mmask16 m, __m256h a, __m256h b) { -// CHECK-LABEL: @test_mm256_mask_cmp_round_ph_mask -// CHECK: [[CMP:%.*]] = fcmp oeq <16 x half> %{{.*}}, %{{.*}} -// CHECK: and <16 x i1> [[CMP]], {{.*}} - return _mm256_mask_cmp_round_ph_mask(m, a, b, _CMP_EQ_OQ, _MM_FROUND_NO_EXC); -} - -__mmask8 test_mm256_cmp_round_ps_mask(__m256 a, __m256 b) { -// CHECK-LABEL: @test_mm256_cmp_round_ps_mask -// CHECK: fcmp oeq <8 x float> %{{.*}}, %{{.*}} - return _mm256_cmp_round_ps_mask(a, b, _CMP_EQ_OQ, _MM_FROUND_NO_EXC); -} - -__mmask8 test_mm256_mask_cmp_round_ps_mask(__mmask8 m, __m256 a, __m256 b) { -// CHECK-LABEL: @test_mm256_mask_cmp_round_ps_mask -// CHECK: [[CMP:%.*]] = fcmp oeq <8 x float> %{{.*}}, %{{.*}} -// CHECK: and <8 x i1> [[CMP]], {{.*}} - return _mm256_mask_cmp_round_ps_mask(m, a, b, _CMP_EQ_OQ, _MM_FROUND_NO_EXC); -} - -__m128h test_mm256_cvt_roundepi32_ph(__m256i A) { -// CHECK-LABEL: test_mm256_cvt_roundepi32_ph -// CHECK: @llvm.x86.avx512.sitofp.round.v8f16.v8i32(<8 x i32> %{{.*}}, i32 11) - return _mm256_cvt_roundepi32_ph(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m128h test_mm256_mask_cvt_roundepi32_ph(__m128h A, __mmask8 B, __m256i C) { -// CHECK-LABEL: test_mm256_mask_cvt_roundepi32_ph -// CHECK: @llvm.x86.avx512.sitofp.round.v8f16.v8i32(<8 x i32> %{{.*}}, i32 10) -// CHECK: select <8 x i1> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}} - return _mm256_mask_cvt_roundepi32_ph(A, B, C, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); -} - -__m128h test_mm256_maskz_cvt_roundepi32_ph(__mmask8 A, __m256i B) { -// CHECK-LABEL: test_mm256_maskz_cvt_roundepi32_ph -// CHECK: @llvm.x86.avx512.sitofp.round.v8f16.v8i32(<8 x i32> %{{.*}}, i32 9) -// CHECK: select <8 x i1> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}} - return _mm256_maskz_cvt_roundepi32_ph(A, B, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_cvt_roundepi32_ps(__m256i __A) -{ -// CHECK-LABEL: @test_mm256_cvt_roundepi32_ps -// CHECK: @llvm.x86.avx512.sitofp.round.v8f32.v8i32 - return _mm256_cvt_roundepi32_ps(__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_mask_cvt_roundepi32_ps(__m256 __W, __mmask8 __U, __m256i __A) -{ -// CHECK-LABEL: @test_mm256_mask_cvt_roundepi32_ps -// CHECK: @llvm.x86.avx512.sitofp.round.v8f32.v8i32 -// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_mask_cvt_roundepi32_ps(__W, __U, __A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_maskz_cvt_roundepi32_ps(__mmask8 __U, __m256i __A) -{ -// CHECK-LABEL: @test_mm256_maskz_cvt_roundepi32_ps -// CHECK: @llvm.x86.avx512.sitofp.round.v8f32.v8i32 -// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_maskz_cvt_roundepi32_ps(__U, __A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m128i test_mm256_cvt_roundpd_epi32(__m256d A) -{ -// CHECK-LABEL: @test_mm256_cvt_roundpd_epi32 -// CHECK: @llvm.x86.avx10.mask.vcvtpd2dq256 - return _mm256_cvt_roundpd_epi32(A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m128i test_mm256_mask_cvt_roundpd_epi32(__m128i W,__mmask8 U,__m256d A) -{ -// CHECK-LABEL: @test_mm256_mask_cvt_roundpd_epi32 -// CHECK: @llvm.x86.avx10.mask.vcvtpd2dq256 - return _mm256_mask_cvt_roundpd_epi32(W, U, A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m128i test_mm256_maskz_cvt_roundpd_epi32(__mmask8 U, __m256d A) -{ -// CHECK-LABEL: @test_mm256_maskz_cvt_roundpd_epi32 -// CHECK: @llvm.x86.avx10.mask.vcvtpd2dq256 - return _mm256_maskz_cvt_roundpd_epi32(U, A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m128h test_mm256_cvt_roundpd_ph(__m256d A) { -// CHECK-LABEL: test_mm256_cvt_roundpd_ph -// CHECK: @llvm.x86.avx10.mask.vcvtpd2ph256 - return _mm256_cvt_roundpd_ph(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m128h test_mm256_mask_cvt_roundpd_ph(__m128h A, __mmask8 B, __m256d C) { -// CHECK-LABEL: test_mm256_mask_cvt_roundpd_ph -// CHECK: @llvm.x86.avx10.mask.vcvtpd2ph256 - return _mm256_mask_cvt_roundpd_ph(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m128h test_mm256_maskz_cvt_roundpd_ph(__mmask8 A, __m256d B) { -// CHECK-LABEL: test_mm256_maskz_cvt_roundpd_ph -// CHECK: @llvm.x86.avx10.mask.vcvtpd2ph256 - return _mm256_maskz_cvt_roundpd_ph(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m128 test_mm256_cvt_roundpd_ps(__m256d A) -{ -// CHECK-LABEL: @test_mm256_cvt_roundpd_ps -// CHECK: @llvm.x86.avx10.mask.vcvtpd2ps256 - return _mm256_cvt_roundpd_ps(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m128 test_mm256_mask_cvt_roundpd_ps(__m128 W, __mmask8 U,__m256d A) -{ -// CHECK-LABEL: @test_mm256_mask_cvt_roundpd_ps -// CHECK: @llvm.x86.avx10.mask.vcvtpd2ps256 - return _mm256_mask_cvt_roundpd_ps(W, U, A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m128 test_mm256_maskz_cvt_roundpd_ps(__mmask8 U, __m256d A) -{ -// CHECK-LABEL: @test_mm256_maskz_cvt_roundpd_ps -// CHECK: @llvm.x86.avx10.mask.vcvtpd2ps256 - return _mm256_maskz_cvt_roundpd_ps(U, A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_cvt_roundpd_epi64(__m256d __A) { -// CHECK-LABEL: @test_mm256_cvt_roundpd_epi64 -// CHECK: @llvm.x86.avx10.mask.vcvtpd2qq256 - return _mm256_cvt_roundpd_epi64(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_mask_cvt_roundpd_epi64(__m256i __W, __mmask8 __U, __m256d __A) { -// CHECK-LABEL: @test_mm256_mask_cvt_roundpd_epi64 -// CHECK: @llvm.x86.avx10.mask.vcvtpd2qq256 - return _mm256_mask_cvt_roundpd_epi64(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_maskz_cvt_roundpd_epi64(__mmask8 __U, __m256d __A) { -// CHECK-LABEL: @test_mm256_maskz_cvt_roundpd_epi64 -// CHECK: @llvm.x86.avx10.mask.vcvtpd2qq256 - return _mm256_maskz_cvt_roundpd_epi64(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); -} - -__m128i test_mm256_cvt_roundpd_epu32(__m256d A) -{ -// CHECK-LABEL: @test_mm256_cvt_roundpd_epu32 -// CHECK: @llvm.x86.avx10.mask.vcvtpd2udq256 - return _mm256_cvt_roundpd_epu32(A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m128i test_mm256_mask_cvt_roundpd_epu32(__m128i W,__mmask8 U,__m256d A) -{ -// CHECK-LABEL: @test_mm256_mask_cvt_roundpd_epu32 -// CHECK: @llvm.x86.avx10.mask.vcvtpd2udq256 - return _mm256_mask_cvt_roundpd_epu32(W, U, A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m128i test_mm256_maskz_cvt_roundpd_epu32(__mmask8 U, __m256d A) -{ -// CHECK-LABEL: @test_mm256_maskz_cvt_roundpd_epu32 -// CHECK: @llvm.x86.avx10.mask.vcvtpd2udq256 - return _mm256_maskz_cvt_roundpd_epu32(U, A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_cvt_roundpd_epu64(__m256d __A) { -// CHECK-LABEL: @test_mm256_cvt_roundpd_epu64 -// CHECK: @llvm.x86.avx10.mask.vcvtpd2uqq256 - return _mm256_cvt_roundpd_epu64(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_mask_cvt_roundpd_epu64(__m256i __W, __mmask8 __U, __m256d __A) { -// CHECK-LABEL: @test_mm256_mask_cvt_roundpd_epu64 -// CHECK: @llvm.x86.avx10.mask.vcvtpd2uqq256 - return _mm256_mask_cvt_roundpd_epu64(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_maskz_cvt_roundpd_epu64(__mmask8 __U, __m256d __A) { -// CHECK-LABEL: @test_mm256_maskz_cvt_roundpd_epu64 -// CHECK: @llvm.x86.avx10.mask.vcvtpd2uqq256 - return _mm256_maskz_cvt_roundpd_epu64(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_cvt_roundph_epi32(__m128h A) { -// CHECK-LABEL: test_mm256_cvt_roundph_epi32 -// CHECK: @llvm.x86.avx10.mask.vcvtph2dq256 - return _mm256_cvt_roundph_epi32(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_mask_cvt_roundph_epi32(__m256i A, __mmask16 B, __m128h C) { -// CHECK-LABEL: test_mm256_mask_cvt_roundph_epi32 -// CHECK: @llvm.x86.avx10.mask.vcvtph2dq256 - return _mm256_mask_cvt_roundph_epi32(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_maskz_cvt_roundph_epi32(__mmask16 A, __m128h B) { -// CHECK-LABEL: test_mm256_maskz_cvt_roundph_epi32 -// CHECK: @llvm.x86.avx10.mask.vcvtph2dq256 - return _mm256_maskz_cvt_roundph_epi32(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_cvt_roundph_pd(__m128h A) { -// CHECK-LABEL: test_mm256_cvt_roundph_pd -// CHECK: @llvm.x86.avx10.mask.vcvtph2pd256 - return _mm256_cvt_roundph_pd(A, _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_mask_cvt_roundph_pd(__m256d A, __mmask8 B, __m128h C) { -// CHECK-LABEL: test_mm256_mask_cvt_roundph_pd -// CHECK: @llvm.x86.avx10.mask.vcvtph2pd256 - return _mm256_mask_cvt_roundph_pd(A, B, C, _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_maskz_cvt_roundph_pd(__mmask8 A, __m128h B) { -// CHECK-LABEL: test_mm256_maskz_cvt_roundph_pd -// CHECK: @llvm.x86.avx10.mask.vcvtph2pd256 - return _mm256_maskz_cvt_roundph_pd(A, B, _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_cvtx_roundph_ps(__m128h A) { -// CHECK-LABEL: test_mm256_cvtx_roundph_ps -// CHECK: @llvm.x86.avx10.mask.vcvtph2psx256 - return _mm256_cvtx_roundph_ps(A, _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_mask_cvtx_roundph_ps(__m256 A, __mmask16 B, __m128h C) { -// CHECK-LABEL: test_mm256_mask_cvtx_roundph_ps -// CHECK: @llvm.x86.avx10.mask.vcvtph2psx256 - return _mm256_mask_cvtx_roundph_ps(A, B, C, _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_maskz_cvtx_roundph_ps(__mmask16 A, __m128h B) { -// CHECK-LABEL: test_mm256_maskz_cvtx_roundph_ps -// CHECK: @llvm.x86.avx10.mask.vcvtph2psx256 - return _mm256_maskz_cvtx_roundph_ps(A, B, _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_cvt_roundph_epi64(__m128h A) { -// CHECK-LABEL: test_mm256_cvt_roundph_epi64 -// CHECK: @llvm.x86.avx10.mask.vcvtph2qq256 - return _mm256_cvt_roundph_epi64(A, _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_mask_cvt_roundph_epi64(__m256i A, __mmask8 B, __m128h C) { -// CHECK-LABEL: test_mm256_mask_cvt_roundph_epi64 -// CHECK: @llvm.x86.avx10.mask.vcvtph2qq256 - return _mm256_mask_cvt_roundph_epi64(A, B, C, _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_maskz_cvt_roundph_epi64(__mmask8 A, __m128h B) { -// CHECK-LABEL: test_mm256_maskz_cvt_roundph_epi64 -// CHECK: @llvm.x86.avx10.mask.vcvtph2qq256 - return _mm256_maskz_cvt_roundph_epi64(A, B, _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_cvt_roundph_epu32(__m128h A) { -// CHECK-LABEL: test_mm256_cvt_roundph_epu32 -// CHECK: @llvm.x86.avx10.mask.vcvtph2udq256 - return _mm256_cvt_roundph_epu32(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_mask_cvt_roundph_epu32(__m256i A, __mmask16 B, __m128h C) { -// CHECK-LABEL: test_mm256_mask_cvt_roundph_epu32 -// CHECK: @llvm.x86.avx10.mask.vcvtph2udq256 - return _mm256_mask_cvt_roundph_epu32(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_maskz_cvt_roundph_epu32(__mmask16 A, __m128h B) { -// CHECK-LABEL: test_mm256_maskz_cvt_roundph_epu32 -// CHECK: @llvm.x86.avx10.mask.vcvtph2udq256 - return _mm256_maskz_cvt_roundph_epu32(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_cvt_roundph_epu64(__m128h A) { -// CHECK-LABEL: test_mm256_cvt_roundph_epu64 -// CHECK: @llvm.x86.avx10.mask.vcvtph2uqq256 - return _mm256_cvt_roundph_epu64(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_mask_cvt_roundph_epu64(__m256i A, __mmask8 B, __m128h C) { -// CHECK-LABEL: test_mm256_mask_cvt_roundph_epu64 -// CHECK: @llvm.x86.avx10.mask.vcvtph2uqq256 - return _mm256_mask_cvt_roundph_epu64(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_maskz_cvt_roundph_epu64(__mmask8 A, __m128h B) { -// CHECK-LABEL: test_mm256_maskz_cvt_roundph_epu64 -// CHECK: @llvm.x86.avx10.mask.vcvtph2uqq256 - return _mm256_maskz_cvt_roundph_epu64(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_cvt_roundph_epu16(__m256h A) { -// CHECK-LABEL: test_mm256_cvt_roundph_epu16 -// CHECK: @llvm.x86.avx10.mask.vcvtph2uw256 - return _mm256_cvt_roundph_epu16(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_mask_cvt_roundph_epu16(__m256i A, __mmask32 B, __m256h C) { -// CHECK-LABEL: test_mm256_mask_cvt_roundph_epu16 -// CHECK: @llvm.x86.avx10.mask.vcvtph2uw256 - return _mm256_mask_cvt_roundph_epu16(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_maskz_cvt_roundph_epu16(__mmask32 A, __m256h B) { -// CHECK-LABEL: test_mm256_maskz_cvt_roundph_epu16 -// CHECK: @llvm.x86.avx10.mask.vcvtph2uw256 - return _mm256_maskz_cvt_roundph_epu16(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_cvt_roundph_epi16(__m256h A) { -// CHECK-LABEL: test_mm256_cvt_roundph_epi16 -// CHECK: @llvm.x86.avx10.mask.vcvtph2w256 - return _mm256_cvt_roundph_epi16(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_mask_cvt_roundph_epi16(__m256i A, __mmask32 B, __m256h C) { -// CHECK-LABEL: test_mm256_mask_cvt_roundph_epi16 -// CHECK: @llvm.x86.avx10.mask.vcvtph2w256 - return _mm256_mask_cvt_roundph_epi16(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_maskz_cvt_roundph_epi16(__mmask32 A, __m256h B) { -// CHECK-LABEL: test_mm256_maskz_cvt_roundph_epi16 -// CHECK: @llvm.x86.avx10.mask.vcvtph2w256 - return _mm256_maskz_cvt_roundph_epi16(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_cvt_roundps_epi32(__m256 __A) -{ -// CHECK-LABEL: @test_mm256_cvt_roundps_epi32 -// CHECK: @llvm.x86.avx10.mask.vcvtps2dq256 - return _mm256_cvt_roundps_epi32(__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_mask_cvt_roundps_epi32(__m256i __W,__mmask16 __U,__m256 __A) -{ -// CHECK-LABEL: @test_mm256_mask_cvt_roundps_epi32 -// CHECK: @llvm.x86.avx10.mask.vcvtps2dq256 - return _mm256_mask_cvt_roundps_epi32(__W,__U,__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_maskz_cvt_roundps_epi32(__mmask16 __U, __m256 __A) -{ -// CHECK-LABEL: @test_mm256_maskz_cvt_roundps_epi32 -// CHECK: @llvm.x86.avx10.mask.vcvtps2dq256 - return _mm256_maskz_cvt_roundps_epi32(__U,__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_cvt_roundps_pd(__m128 __A) { -// CHECK-LABEL: @test_mm256_cvt_roundps_pd -// CHECK: @llvm.x86.avx10.mask.vcvtps2pd256 - return _mm256_cvt_roundps_pd(__A, _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_mask_cvt_roundps_pd(__m256d __W, __mmask8 __U, __m128 __A) { -// CHECK-LABEL: @test_mm256_mask_cvt_roundps_pd -// CHECK: @llvm.x86.avx10.mask.vcvtps2pd256 - return _mm256_mask_cvt_roundps_pd(__W, __U, __A, _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_maskz_cvt_roundps_pd(__mmask8 __U, __m128 __A) { -// CHECK-LABEL: @test_mm256_maskz_cvt_roundps_pd -// CHECK: @llvm.x86.avx10.mask.vcvtps2pd256 - return _mm256_maskz_cvt_roundps_pd(__U, __A, _MM_FROUND_NO_EXC); -} - -// FIXME: We may change to @llvm.x86.avx10.mask.vcvtps2ph256 in future. -__m128i test_mm256_cvt_roundps_ph(__m256 __A) -{ - // CHECK-LABEL: @test_mm256_cvt_roundps_ph - // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.256 - return _mm256_cvt_roundps_ph(__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m128i test_mm256_mask_cvt_roundps_ph(__m128i __W , __mmask16 __U, __m256 __A) -{ - // CHECK-LABEL: @test_mm256_mask_cvt_roundps_ph - // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.256 - return _mm256_mask_cvt_roundps_ph(__W, __U, __A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m128i test_mm256_maskz_cvt_roundps_ph(__mmask16 __U, __m256 __A) -{ - // CHECK-LABEL: @test_mm256_maskz_cvt_roundps_ph - // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.256 - return _mm256_maskz_cvt_roundps_ph(__U, __A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m128h test_mm256_cvtx_roundps_ph(__m256 A) { -// CHECK-LABEL: test_mm256_cvtx_roundps_ph -// CHECK: @llvm.x86.avx10.mask.vcvtps2phx256 - return _mm256_cvtx_roundps_ph(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m128h test_mm256_mask_cvtx_roundps_ph(__m128h A, __mmask16 B, __m256 C) { -// CHECK-LABEL: test_mm256_mask_cvtx_roundps_ph -// CHECK: @llvm.x86.avx10.mask.vcvtps2phx256 - return _mm256_mask_cvtx_roundps_ph(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m128h test_mm256_maskz_cvtx_roundps_ph(__mmask16 A, __m256 B) { -// CHECK-LABEL: test_mm256_maskz_cvtx_roundps_ph -// CHECK: @llvm.x86.avx10.mask.vcvtps2phx256 - return _mm256_maskz_cvtx_roundps_ph(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_cvt_roundps_epi64(__m128 __A) { -// CHECK-LABEL: @test_mm256_cvt_roundps_epi64 -// CHECK: @llvm.x86.avx10.mask.vcvtps2qq256 - return _mm256_cvt_roundps_epi64(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_mask_cvt_roundps_epi64(__m256i __W, __mmask8 __U, __m128 __A) { -// CHECK-LABEL: @test_mm256_mask_cvt_roundps_epi64 -// CHECK: @llvm.x86.avx10.mask.vcvtps2qq256 - return _mm256_mask_cvt_roundps_epi64(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_maskz_cvt_roundps_epi64(__mmask8 __U, __m128 __A) { -// CHECK-LABEL: @test_mm256_maskz_cvt_roundps_epi64 -// CHECK: @llvm.x86.avx10.mask.vcvtps2qq256 - return _mm256_maskz_cvt_roundps_epi64(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_cvt_roundps_epu32(__m256 __A) -{ -// CHECK-LABEL: @test_mm256_cvt_roundps_epu32 -// CHECK: @llvm.x86.avx10.mask.vcvtps2udq256 - return _mm256_cvt_roundps_epu32(__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_mask_cvt_roundps_epu32(__m256i __W,__mmask16 __U,__m256 __A) -{ -// CHECK-LABEL: @test_mm256_mask_cvt_roundps_epu32 -// CHECK: @llvm.x86.avx10.mask.vcvtps2udq256 - return _mm256_mask_cvt_roundps_epu32(__W,__U,__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_maskz_cvt_roundps_epu32(__mmask16 __U,__m256 __A) -{ -// CHECK-LABEL: @test_mm256_maskz_cvt_roundps_epu32 -// CHECK: @llvm.x86.avx10.mask.vcvtps2udq256 - return _mm256_maskz_cvt_roundps_epu32(__U,__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_cvt_roundps_epu64(__m128 __A) { -// CHECK-LABEL: @test_mm256_cvt_roundps_epu64 -// CHECK: @llvm.x86.avx10.mask.vcvtps2uqq256 - return _mm256_cvt_roundps_epu64(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_mask_cvt_roundps_epu64(__m256i __W, __mmask8 __U, __m128 __A) { -// CHECK-LABEL: @test_mm256_mask_cvt_roundps_epu64 -// CHECK: @llvm.x86.avx10.mask.vcvtps2uqq256 - return _mm256_mask_cvt_roundps_epu64(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_maskz_cvt_roundps_epu64(__mmask8 __U, __m128 __A) { -// CHECK-LABEL: @test_mm256_maskz_cvt_roundps_epu64 -// CHECK: @llvm.x86.avx10.mask.vcvtps2uqq256 - return _mm256_maskz_cvt_roundps_epu64(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); -} - -__m256d test__mm256_cvt_roundepi64_pd(__m256i __A) { -// CHECK-LABEL: @test__mm256_cvt_roundepi64_pd -// CHECK: @llvm.x86.avx512.sitofp.round.v4f64.v4i64 - return _mm256_cvt_roundepi64_pd(__A, _MM_FROUND_NO_EXC); -} - -__m256d test__mm256_mask_cvt_roundepi64_pd(__m256d __W, __mmask8 __U, __m256i __A) { -// CHECK-LABEL: @test__mm256_mask_cvt_roundepi64_pd -// CHECK: @llvm.x86.avx512.sitofp.round.v4f64.v4i64 - return _mm256_mask_cvt_roundepi64_pd(__W, __U, __A, _MM_FROUND_NO_EXC); -} - -__m256d test__mm256_maskz_cvt_roundepi64_pd(__mmask8 __U, __m256i __A) { -// CHECK-LABEL: @test__mm256_maskz_cvt_roundepi64_pd -// CHECK: @llvm.x86.avx512.sitofp.round.v4f64.v4i64 - return _mm256_maskz_cvt_roundepi64_pd(__U, __A, _MM_FROUND_NO_EXC); -} - -// FIXME: We may change to @llvm.x86.avx10.mask.vcvtqq2ph256 in future. -__m128h test_mm256_cvt_roundepi64_ph(__m256i A) { -// CHECK-LABEL: test_mm256_cvt_roundepi64_ph -// CHECK: @llvm.x86.avx512.sitofp.round.v8f16.v4i64 - return _mm256_cvt_roundepi64_ph(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m128h test_mm256_mask_cvt_roundepi64_ph(__m128h A, __mmask8 B, __m256i C) { -// CHECK-LABEL: test_mm256_mask_cvt_roundepi64_ph -// CHECK: @llvm.x86.avx512.sitofp.round.v8f16.v4i64 - return _mm256_mask_cvt_roundepi64_ph(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m128h test_mm256_maskz_cvt_roundepi64_ph(__mmask8 A, __m256i B) { -// CHECK-LABEL: test_mm256_maskz_cvt_roundepi64_ph -// CHECK: @llvm.x86.avx512.sitofp.round.v8f16.v4i64 - return _mm256_maskz_cvt_roundepi64_ph(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m128 test_mm256_cvt_roundepi64_ps(__m256i __A) { -// CHECK-LABEL: @test_mm256_cvt_roundepi64_ps -// CHECK: @llvm.x86.avx512.sitofp.round.v4f32.v4i64 - return _mm256_cvt_roundepi64_ps(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); -} - -__m128 test_mm256_mask_cvt_roundepi64_ps(__m128 __W, __mmask8 __U, __m256i __A) { -// CHECK-LABEL: @test_mm256_mask_cvt_roundepi64_ps -// CHECK: @llvm.x86.avx512.sitofp.round.v4f32.v4i64 -// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} - return _mm256_mask_cvt_roundepi64_ps(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); -} - -__m128 test_mm256_maskz_cvt_roundepi64_ps(__mmask8 __U, __m256i __A) { -// CHECK-LABEL: @test_mm256_maskz_cvt_roundepi64_ps -// CHECK: @llvm.x86.avx512.sitofp.round.v4f32.v4i64 -// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} - return _mm256_maskz_cvt_roundepi64_ps(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); -} - -__m128i test_mm256_cvtt_roundpd_epi32(__m256d A) -{ -// CHECK-LABEL: @test_mm256_cvtt_roundpd_epi32 -// CHECK: @llvm.x86.avx10.mask.vcvttpd2dq256 - return _mm256_cvtt_roundpd_epi32(A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m128i test_mm256_mask_cvtt_roundpd_epi32(__m128i W,__mmask8 U,__m256d A) -{ -// CHECK-LABEL: @test_mm256_mask_cvtt_roundpd_epi32 -// CHECK: @llvm.x86.avx10.mask.vcvttpd2dq256 - return _mm256_mask_cvtt_roundpd_epi32(W, U, A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m128i test_mm256_maskz_cvtt_roundpd_epi32(__mmask8 U, __m256d A) -{ -// CHECK-LABEL: @test_mm256_maskz_cvtt_roundpd_epi32 -// CHECK: @llvm.x86.avx10.mask.vcvttpd2dq256 - return _mm256_maskz_cvtt_roundpd_epi32(U, A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_cvtt_roundpd_epi64(__m256d __A) { -// CHECK-LABEL: @test_mm256_cvtt_roundpd_epi64 -// CHECK: @llvm.x86.avx10.mask.vcvttpd2qq256 - return _mm256_cvtt_roundpd_epi64(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_mask_cvtt_roundpd_epi64(__m256i __W, __mmask8 __U, __m256d __A) { -// CHECK-LABEL: @test_mm256_mask_cvtt_roundpd_epi64 -// CHECK: @llvm.x86.avx10.mask.vcvttpd2qq256 - return _mm256_mask_cvtt_roundpd_epi64(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_maskz_cvtt_roundpd_epi64(__mmask8 __U, __m256d __A) { -// CHECK-LABEL: @test_mm256_maskz_cvtt_roundpd_epi64 -// CHECK: @llvm.x86.avx10.mask.vcvttpd2qq256 - return _mm256_maskz_cvtt_roundpd_epi64(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); -} - -__m128i test_mm256_cvtt_roundpd_epu32(__m256d A) -{ -// CHECK-LABEL: @test_mm256_cvtt_roundpd_epu32 -// CHECK: @llvm.x86.avx10.mask.vcvttpd2udq256 - return _mm256_cvtt_roundpd_epu32(A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m128i test_mm256_mask_cvtt_roundpd_epu32(__m128i W,__mmask8 U,__m256d A) -{ -// CHECK-LABEL: @test_mm256_mask_cvtt_roundpd_epu32 -// CHECK: @llvm.x86.avx10.mask.vcvttpd2udq256 - return _mm256_mask_cvtt_roundpd_epu32(W, U, A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m128i test_mm256_maskz_cvtt_roundpd_epu32(__mmask8 U, __m256d A) -{ -// CHECK-LABEL: @test_mm256_maskz_cvtt_roundpd_epu32 -// CHECK: @llvm.x86.avx10.mask.vcvttpd2udq256 - return _mm256_maskz_cvtt_roundpd_epu32(U, A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_cvtt_roundpd_epu64(__m256d __A) { -// CHECK-LABEL: @test_mm256_cvtt_roundpd_epu64 -// CHECK: @llvm.x86.avx10.mask.vcvttpd2uqq256 - return _mm256_cvtt_roundpd_epu64(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_mask_cvtt_roundpd_epu64(__m256i __W, __mmask8 __U, __m256d __A) { -// CHECK-LABEL: @test_mm256_mask_cvtt_roundpd_epu64 -// CHECK: @llvm.x86.avx10.mask.vcvttpd2uqq256 - return _mm256_mask_cvtt_roundpd_epu64(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_maskz_cvtt_roundpd_epu64(__mmask8 __U, __m256d __A) { -// CHECK-LABEL: @test_mm256_maskz_cvtt_roundpd_epu64 -// CHECK: @llvm.x86.avx10.mask.vcvttpd2uqq256 - return _mm256_maskz_cvtt_roundpd_epu64(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_cvtt_roundph_epi32(__m128h A) { -// CHECK-LABEL: test_mm256_cvtt_roundph_epi32 -// CHECK: @llvm.x86.avx10.mask.vcvttph2dq256 - return _mm256_cvtt_roundph_epi32(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_mask_cvtt_roundph_epi32(__m256i A, __mmask16 B, __m128h C) { -// CHECK-LABEL: test_mm256_mask_cvtt_roundph_epi32 -// CHECK: @llvm.x86.avx10.mask.vcvttph2dq256 - return _mm256_mask_cvtt_roundph_epi32(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_maskz_cvtt_roundph_epi32(__mmask16 A, __m128h B) { -// CHECK-LABEL: test_mm256_maskz_cvtt_roundph_epi32 -// CHECK: @llvm.x86.avx10.mask.vcvttph2dq256 - return _mm256_maskz_cvtt_roundph_epi32(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_cvtt_roundph_epi64(__m128h A) { -// CHECK-LABEL: test_mm256_cvtt_roundph_epi64 -// CHECK: @llvm.x86.avx10.mask.vcvttph2qq256 - return _mm256_cvtt_roundph_epi64(A, _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_mask_cvtt_roundph_epi64(__m256i A, __mmask8 B, __m128h C) { -// CHECK-LABEL: test_mm256_mask_cvtt_roundph_epi64 -// CHECK: @llvm.x86.avx10.mask.vcvttph2qq256 - return _mm256_mask_cvtt_roundph_epi64(A, B, C, _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_maskz_cvtt_roundph_epi64(__mmask8 A, __m128h B) { -// CHECK-LABEL: test_mm256_maskz_cvtt_roundph_epi64 -// CHECK: @llvm.x86.avx10.mask.vcvttph2qq256 - return _mm256_maskz_cvtt_roundph_epi64(A, B, _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_cvtt_roundph_epu32(__m128h A) { -// CHECK-LABEL: test_mm256_cvtt_roundph_epu32 -// CHECK: @llvm.x86.avx10.mask.vcvttph2udq256 - return _mm256_cvtt_roundph_epu32(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_mask_cvtt_roundph_epu32(__m256i A, __mmask16 B, __m128h C) { -// CHECK-LABEL: test_mm256_mask_cvtt_roundph_epu32 -// CHECK: @llvm.x86.avx10.mask.vcvttph2udq256 - return _mm256_mask_cvtt_roundph_epu32(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_maskz_cvtt_roundph_epu32(__mmask16 A, __m128h B) { -// CHECK-LABEL: test_mm256_maskz_cvtt_roundph_epu32 -// CHECK: @llvm.x86.avx10.mask.vcvttph2udq256 - return _mm256_maskz_cvtt_roundph_epu32(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_cvtt_roundph_epu64(__m128h A) { -// CHECK-LABEL: test_mm256_cvtt_roundph_epu64 -// CHECK: @llvm.x86.avx10.mask.vcvttph2uqq256 - return _mm256_cvtt_roundph_epu64(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_mask_cvtt_roundph_epu64(__m256i A, __mmask8 B, __m128h C) { -// CHECK-LABEL: test_mm256_mask_cvtt_roundph_epu64 -// CHECK: @llvm.x86.avx10.mask.vcvttph2uqq256 - return _mm256_mask_cvtt_roundph_epu64(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_maskz_cvtt_roundph_epu64(__mmask8 A, __m128h B) { -// CHECK-LABEL: test_mm256_maskz_cvtt_roundph_epu64 -// CHECK: @llvm.x86.avx10.mask.vcvttph2uqq256 - return _mm256_maskz_cvtt_roundph_epu64(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_cvtt_roundph_epu16(__m256h A) { -// CHECK-LABEL: test_mm256_cvtt_roundph_epu16 -// CHECK: @llvm.x86.avx10.mask.vcvttph2uw256 - return _mm256_cvtt_roundph_epu16(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_mask_cvtt_roundph_epu16(__m256i A, __mmask32 B, __m256h C) { -// CHECK-LABEL: test_mm256_mask_cvtt_roundph_epu16 -// CHECK: @llvm.x86.avx10.mask.vcvttph2uw256 - return _mm256_mask_cvtt_roundph_epu16(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_maskz_cvtt_roundph_epu16(__mmask32 A, __m256h B) { -// CHECK-LABEL: test_mm256_maskz_cvtt_roundph_epu16 -// CHECK: @llvm.x86.avx10.mask.vcvttph2uw256 - return _mm256_maskz_cvtt_roundph_epu16(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_cvtt_roundph_epi16(__m256h A) { -// CHECK-LABEL: test_mm256_cvtt_roundph_epi16 -// CHECK: @llvm.x86.avx10.mask.vcvttph2w256 - return _mm256_cvtt_roundph_epi16(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_mask_cvtt_roundph_epi16(__m256i A, __mmask32 B, __m256h C) { -// CHECK-LABEL: test_mm256_mask_cvtt_roundph_epi16 -// CHECK: @llvm.x86.avx10.mask.vcvttph2w256 - return _mm256_mask_cvtt_roundph_epi16(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_maskz_cvtt_roundph_epi16(__mmask32 A, __m256h B) { -// CHECK-LABEL: test_mm256_maskz_cvtt_roundph_epi16 -// CHECK: @llvm.x86.avx10.mask.vcvttph2w256 - return _mm256_maskz_cvtt_roundph_epi16(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_cvtt_roundps_epi32(__m256 __A) -{ -// CHECK-LABEL: @test_mm256_cvtt_roundps_epi32 -// CHECK: @llvm.x86.avx10.mask.vcvttps2dq256 - return _mm256_cvtt_roundps_epi32(__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_mask_cvtt_roundps_epi32(__m256i __W,__mmask16 __U,__m256 __A) -{ -// CHECK-LABEL: @test_mm256_mask_cvtt_roundps_epi32 -// CHECK: @llvm.x86.avx10.mask.vcvttps2dq256 - return _mm256_mask_cvtt_roundps_epi32(__W,__U,__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_maskz_cvtt_roundps_epi32(__mmask16 __U, __m256 __A) -{ -// CHECK-LABEL: @test_mm256_maskz_cvtt_roundps_epi32 -// CHECK: @llvm.x86.avx10.mask.vcvttps2dq256 - return _mm256_maskz_cvtt_roundps_epi32(__U,__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_cvtt_roundps_epi64(__m128 __A) { -// CHECK-LABEL: @test_mm256_cvtt_roundps_epi64 -// CHECK: @llvm.x86.avx10.mask.vcvttps2qq256 - return _mm256_cvtt_roundps_epi64(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_mask_cvtt_roundps_epi64(__m256i __W, __mmask8 __U, __m128 __A) { -// CHECK-LABEL: @test_mm256_mask_cvtt_roundps_epi64 -// CHECK: @llvm.x86.avx10.mask.vcvttps2qq256 - return _mm256_mask_cvtt_roundps_epi64(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_maskz_cvtt_roundps_epi64(__mmask8 __U, __m128 __A) { -// CHECK-LABEL: @test_mm256_maskz_cvtt_roundps_epi64 -// CHECK: @llvm.x86.avx10.mask.vcvttps2qq256 - return _mm256_maskz_cvtt_roundps_epi64(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_cvtt_roundps_epu32(__m256 __A) -{ -// CHECK-LABEL: @test_mm256_cvtt_roundps_epu32 -// CHECK: @llvm.x86.avx10.mask.vcvttps2udq256 - return _mm256_cvtt_roundps_epu32(__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_mask_cvtt_roundps_epu32(__m256i __W,__mmask16 __U,__m256 __A) -{ -// CHECK-LABEL: @test_mm256_mask_cvtt_roundps_epu32 -// CHECK: @llvm.x86.avx10.mask.vcvttps2udq256 - return _mm256_mask_cvtt_roundps_epu32(__W,__U,__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_maskz_cvtt_roundps_epu32(__mmask16 __U,__m256 __A) -{ -// CHECK-LABEL: @test_mm256_maskz_cvtt_roundps_epu32 -// CHECK: @llvm.x86.avx10.mask.vcvttps2udq256 - return _mm256_maskz_cvtt_roundps_epu32(__U,__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_cvtt_roundps_epu64(__m128 __A) { -// CHECK-LABEL: @test_mm256_cvtt_roundps_epu64 -// CHECK: @llvm.x86.avx10.mask.vcvttps2uqq256 - return _mm256_cvtt_roundps_epu64(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_mask_cvtt_roundps_epu64(__m256i __W, __mmask8 __U, __m128 __A) { -// CHECK-LABEL: @test_mm256_mask_cvtt_roundps_epu64 -// CHECK: @llvm.x86.avx10.mask.vcvttps2uqq256 - return _mm256_mask_cvtt_roundps_epu64(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_maskz_cvtt_roundps_epu64(__mmask8 __U, __m128 __A) { -// CHECK-LABEL: @test_mm256_maskz_cvtt_roundps_epu64 -// CHECK: @llvm.x86.avx10.mask.vcvttps2uqq256 - return _mm256_maskz_cvtt_roundps_epu64(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); -} - -__m128h test_mm256_cvt_roundepu32_ph(__m256i A) { -// CHECK-LABEL: test_mm256_cvt_roundepu32_ph -// CHECK: @llvm.x86.avx512.uitofp.round.v8f16.v8i32(<8 x i32> %{{.*}}, i32 11) - return _mm256_cvt_roundepu32_ph(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m128h test_mm256_mask_cvt_roundepu32_ph(__m128h A, __mmask8 B, __m256i C) { -// CHECK-LABEL: test_mm256_mask_cvt_roundepu32_ph -// CHECK: @llvm.x86.avx512.uitofp.round.v8f16.v8i32(<8 x i32> %{{.*}}, i32 10) -// CHECK: select <8 x i1> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}} - return _mm256_mask_cvt_roundepu32_ph(A, B, C, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); -} - -__m128h test_mm256_maskz_cvt_roundepu32_ph(__mmask8 A, __m256i B) { -// CHECK-LABEL: test_mm256_maskz_cvt_roundepu32_ph -// CHECK: @llvm.x86.avx512.uitofp.round.v8f16.v8i32(<8 x i32> %{{.*}}, i32 9) -// CHECK: select <8 x i1> %{{.*}}, <8 x half> %{{.*}}, <8 x half> %{{.*}} - return _mm256_maskz_cvt_roundepu32_ph(A, B, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_cvt_roundepu32_ps(__m256i __A) -{ -// CHECK-LABEL: @test_mm256_cvt_roundepu32_ps -// CHECK: @llvm.x86.avx512.uitofp.round.v8f32.v8i32 - return _mm256_cvt_roundepu32_ps(__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_mask_cvt_roundepu32_ps(__m256 __W, __mmask8 __U, __m256i __A) -{ -// CHECK-LABEL: @test_mm256_mask_cvt_roundepu32_ps -// CHECK: @llvm.x86.avx512.uitofp.round.v8f32.v8i32 -// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_mask_cvt_roundepu32_ps(__W, __U, __A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_maskz_cvt_roundepu32_ps(__mmask8 __U, __m256i __A) -{ -// CHECK-LABEL: @test_mm256_maskz_cvt_roundepu32_ps -// CHECK: @llvm.x86.avx512.uitofp.round.v8f32.v8i32 -// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_maskz_cvt_roundepu32_ps(__U, __A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test__mm256_cvt_roundepu64_pd(__m256i __A) { -// CHECK-LABEL: @test__mm256_cvt_roundepu64_pd -// CHECK: @llvm.x86.avx512.uitofp.round.v4f64.v4i64 - return _mm256_cvt_roundepu64_pd(__A, _MM_FROUND_NO_EXC); -} - -__m256d test__mm256_mask_cvt_roundepu64_pd(__m256d __W, __mmask8 __U, __m256i __A) { -// CHECK-LABEL: @test__mm256_mask_cvt_roundepu64_pd -// CHECK: @llvm.x86.avx512.uitofp.round.v4f64.v4i64 - return _mm256_mask_cvt_roundepu64_pd(__W, __U, __A, _MM_FROUND_NO_EXC); -} - -__m256d test__mm256_maskz_cvt_roundepu64_pd(__mmask8 __U, __m256i __A) { -// CHECK-LABEL: @test__mm256_maskz_cvt_roundepu64_pd -// CHECK: @llvm.x86.avx512.uitofp.round.v4f64.v4i64 - return _mm256_maskz_cvt_roundepu64_pd(__U, __A, _MM_FROUND_NO_EXC); -} - -// FIXME: We may change to @llvm.x86.avx10.mask.vcvtuqq2ph256 in future. -__m128h test_mm256_cvt_roundepu64_ph(__m256i A) { -// CHECK-LABEL: test_mm256_cvt_roundepu64_ph -// CHECK: @llvm.x86.avx512.uitofp.round.v8f16.v4i64 - return _mm256_cvt_roundepu64_ph(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m128h test_mm256_mask_cvt_roundepu64_ph(__m128h A, __mmask8 B, __m256i C) { -// CHECK-LABEL: test_mm256_mask_cvt_roundepu64_ph -// CHECK: @llvm.x86.avx512.uitofp.round.v8f16.v4i64 - return _mm256_mask_cvt_roundepu64_ph(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m128h test_mm256_maskz_cvt_roundepu64_ph(__mmask8 A, __m256i B) { -// CHECK-LABEL: test_mm256_maskz_cvt_roundepu64_ph -// CHECK: @llvm.x86.avx512.uitofp.round.v8f16.v4i64 - return _mm256_maskz_cvt_roundepu64_ph(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m128 test_mm256_cvt_roundepu64_ps(__m256i __A) { -// CHECK-LABEL: @test_mm256_cvt_roundepu64_ps -// CHECK: @llvm.x86.avx512.uitofp.round.v4f32.v4i64 - return _mm256_cvt_roundepu64_ps(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); -} - -__m128 test_mm256_mask_cvt_roundepu64_ps(__m128 __W, __mmask8 __U, __m256i __A) { -// CHECK-LABEL: @test_mm256_mask_cvt_roundepu64_ps -// CHECK: @llvm.x86.avx512.uitofp.round.v4f32.v4i64 -// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} - return _mm256_mask_cvt_roundepu64_ps(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); -} - -__m128 test_mm256_maskz_cvt_roundepu64_ps(__mmask8 __U, __m256i __A) { -// CHECK-LABEL: @test_mm256_maskz_cvt_roundepu64_ps -// CHECK: @llvm.x86.avx512.uitofp.round.v4f32.v4i64 -// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} - return _mm256_maskz_cvt_roundepu64_ps(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_cvt_roundepi16_ph(__m256i A) { -// CHECK-LABEL: test_mm256_cvt_roundepi16_ph -// CHECK: @llvm.x86.avx512.sitofp.round.v16f16.v16i16 - return _mm256_cvt_roundepi16_ph(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_mask_cvt_roundepi16_ph(__m256h A, __mmask16 B, __m256i C) { -// CHECK-LABEL: test_mm256_mask_cvt_roundepi16_ph -// CHECK: @llvm.x86.avx512.sitofp.round.v16f16.v16i16 - return _mm256_mask_cvt_roundepi16_ph(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_maskz_cvt_roundepi16_ph(__mmask16 A, __m256i B) { -// CHECK-LABEL: test_mm256_maskz_cvt_roundepi16_ph -// CHECK: @llvm.x86.avx512.sitofp.round.v16f16.v16i16 - return _mm256_maskz_cvt_roundepi16_ph(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_cvt_roundepu16_ph(__m256i A) { -// CHECK-LABEL: test_mm256_cvt_roundepu16_ph -// CHECK: @llvm.x86.avx512.uitofp.round.v16f16.v16i16 - return _mm256_cvt_roundepu16_ph(A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_mask_cvt_roundepu16_ph(__m256h A, __mmask16 B, __m256i C) { -// CHECK-LABEL: test_mm256_mask_cvt_roundepu16_ph -// CHECK: @llvm.x86.avx512.uitofp.round.v16f16.v16i16 - return _mm256_mask_cvt_roundepu16_ph(A, B, C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_maskz_cvt_roundepu16_ph(__mmask16 A, __m256i B) { -// CHECK-LABEL: test_mm256_maskz_cvt_roundepu16_ph -// CHECK: @llvm.x86.avx512.uitofp.round.v16f16.v16i16 - return _mm256_maskz_cvt_roundepu16_ph(A, B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_div_round_pd(__m256d __A, __m256d __B) { -// CHECK-LABEL: @test_mm256_div_round_pd -// CHECK: @llvm.x86.avx10.vdivpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 11) - return _mm256_div_round_pd(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_mask_div_round_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { -// CHECK-LABEL: @test_mm256_mask_div_round_pd -// CHECK: @llvm.x86.avx10.vdivpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 10) -// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} - return _mm256_mask_div_round_pd(__W, __U, __A, __B, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_maskz_div_round_pd(__mmask8 __U, __m256d __A, __m256d __B) { -// CHECK-LABEL: @test_mm256_maskz_div_round_pd -// CHECK: @llvm.x86.avx10.vdivpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 9) -// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} - return _mm256_maskz_div_round_pd(__U, __A, __B, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_div_round_ph(__m256h __A, __m256h __B) { -// CHECK-LABEL: @test_mm256_div_round_ph -// CHECK: @llvm.x86.avx10.vdivph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 11) - return _mm256_div_round_ph(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_mask_div_round_ph(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) { -// CHECK-LABEL: @test_mm256_mask_div_round_ph -// CHECK: @llvm.x86.avx10.vdivph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 10) -// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}} - return _mm256_mask_div_round_ph(__W, __U, __A, __B, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_maskz_div_round_ph(__mmask8 __U, __m256h __A, __m256h __B) { -// CHECK-LABEL: @test_mm256_maskz_div_round_ph -// CHECK: @llvm.x86.avx10.vdivph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 9) -// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}} - return _mm256_maskz_div_round_ph(__U, __A, __B, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_div_round_ps(__m256 __A, __m256 __B) { -// CHECK-LABEL: @test_mm256_div_round_ps -// CHECK: @llvm.x86.avx10.vdivps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 11) - return _mm256_div_round_ps(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_mask_div_round_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { -// CHECK-LABEL: @test_mm256_mask_div_round_ps -// CHECK: @llvm.x86.avx10.vdivps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 10) -// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_mask_div_round_ps(__W, __U, __A, __B, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_maskz_div_round_ps(__mmask8 __U, __m256 __A, __m256 __B) { -// CHECK-LABEL: @test_mm256_maskz_div_round_ps -// CHECK: @llvm.x86.avx10.vdivps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 9) -// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_maskz_div_round_ps(__U, __A, __B, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_fcmadd_round_pch(__m256h __A, __m256h __B, __m256h __C) { -// CHECK-LABEL: @test_mm256_fcmadd_round_pch -// CHECK: @llvm.x86.avx10.mask.vfcmaddcph256 - return _mm256_fcmadd_round_pch(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_mask_fcmadd_round_pch(__m256h __A, __mmask8 __U, __m256h __B, __m256h __C) { -// CHECK-LABEL: @test_mm256_mask_fcmadd_round_pch -// CHECK: @llvm.x86.avx10.mask.vfcmaddcph256 -// CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_mask_fcmadd_round_pch(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_mask3_fcmadd_round_pch(__m256h __A, __m256h __B, __m256h __C, __mmask8 __U) { -// CHECK-LABEL: @test_mm256_mask3_fcmadd_round_pch -// CHECK: @llvm.x86.avx10.mask.vfcmaddcph256 -// CHECK-NOT: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_mask3_fcmadd_round_pch(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_maskz_fcmadd_round_pch(__mmask8 __U, __m256h __A, __m256h __B, __m256h __C) { -// CHECK-LABEL: @test_mm256_maskz_fcmadd_round_pch -// CHECK: @llvm.x86.avx10.maskz.vfcmaddcph256 - return _mm256_maskz_fcmadd_round_pch(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_cmul_round_pch(__m256h __A, __m256h __B) { -// CHECK-LABEL: @test_mm256_cmul_round_pch -// CHECK: @llvm.x86.avx10.mask.vfcmulcph256 - return _mm256_cmul_round_pch(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_mask_cmul_round_pch(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) { -// CHECK-LABEL: @test_mm256_mask_cmul_round_pch -// CHECK: @llvm.x86.avx10.mask.vfcmulcph256 - return _mm256_mask_cmul_round_pch(__W, __U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_maskz_cmul_round_pch(__mmask8 __U, __m256h __A, __m256h __B) { -// CHECK-LABEL: @test_mm256_maskz_cmul_round_pch -// CHECK: @llvm.x86.avx10.mask.vfcmulcph256 - return _mm256_maskz_cmul_round_pch(__U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_fixupimm_round_pd(__m256d __A, __m256d __B, __m256i __C) { -// CHECK-LABEL: @test_mm256_fixupimm_round_pd -// CHECK: @llvm.x86.avx10.mask.vfixupimmpd256 - return _mm256_fixupimm_round_pd(__A, __B, __C, 5, 8); -} - -__m256d test_mm256_mask_fixupimm_round_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256i __C) { -// CHECK-LABEL: @test_mm256_mask_fixupimm_round_pd -// CHECK: @llvm.x86.avx10.mask.vfixupimmpd256 - return _mm256_mask_fixupimm_round_pd(__A, __U, __B, __C, 5, 8); -} - -__m256d test_mm256_maskz_fixupimm_round_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256i __C) { -// CHECK-LABEL: @test_mm256_maskz_fixupimm_round_pd -// CHECK: @llvm.x86.avx10.maskz.vfixupimmpd256 - return _mm256_maskz_fixupimm_round_pd(__U, __A, __B, __C, 5, 8); -} - -__m256 test_mm256_fixupimm_round_ps(__m256 __A, __m256 __B, __m256i __C) { -// CHECK-LABEL: @test_mm256_fixupimm_round_ps -// CHECK: @llvm.x86.avx10.mask.vfixupimmps256 - return _mm256_fixupimm_round_ps(__A, __B, __C, 5, 8); -} - -__m256 test_mm256_mask_fixupimm_round_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256i __C) { -// CHECK-LABEL: @test_mm256_mask_fixupimm_round_ps -// CHECK: @llvm.x86.avx10.mask.vfixupimmps256 - return _mm256_mask_fixupimm_round_ps(__A, __U, __B, __C, 5, 8); -} - -__m256 test_mm256_maskz_fixupimm_round_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256i __C) { -// CHECK-LABEL: @test_mm256_maskz_fixupimm_round_ps -// CHECK: @llvm.x86.avx10.maskz.vfixupimmps256 - return _mm256_maskz_fixupimm_round_ps(__U, __A, __B, __C, 5, 8); -} - -__m256d test_mm256_fmadd_round_pd(__m256d __A, __m256d __B, __m256d __C) { -// CHECK-LABEL: @test_mm256_fmadd_round_pd -// CHECK: @llvm.x86.avx10.vfmaddpd256 - return _mm256_fmadd_round_pd(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_mask_fmadd_round_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { -// CHECK-LABEL: @test_mm256_mask_fmadd_round_pd -// CHECK: @llvm.x86.avx10.vfmaddpd256 -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} - return _mm256_mask_fmadd_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_mask3_fmadd_round_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { -// CHECK-LABEL: @test_mm256_mask3_fmadd_round_pd -// CHECK: @llvm.x86.avx10.vfmaddpd256 -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} - return _mm256_mask3_fmadd_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_maskz_fmadd_round_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { -// CHECK-LABEL: @test_mm256_maskz_fmadd_round_pd -// CHECK: @llvm.x86.avx10.vfmaddpd256 -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> zeroinitializer - return _mm256_maskz_fmadd_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_fmsub_round_pd(__m256d __A, __m256d __B, __m256d __C) { -// CHECK-LABEL: @test_mm256_fmsub_round_pd -// CHECK: fneg <4 x double> -// CHECK: @llvm.x86.avx10.vfmaddpd256 - return _mm256_fmsub_round_pd(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_mask_fmsub_round_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { -// CHECK-LABEL: @test_mm256_mask_fmsub_round_pd -// CHECK: fneg <4 x double> -// CHECK: @llvm.x86.avx10.vfmaddpd256 -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} - return _mm256_mask_fmsub_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_maskz_fmsub_round_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { -// CHECK-LABEL: @test_mm256_maskz_fmsub_round_pd -// CHECK: fneg <4 x double> -// CHECK: @llvm.x86.avx10.vfmaddpd256 -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> zeroinitializer - return _mm256_maskz_fmsub_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_fnmadd_round_pd(__m256d __A, __m256d __B, __m256d __C) { -// CHECK-LABEL: @test_mm256_fnmadd_round_pd -// CHECK: fneg <4 x double> -// CHECK: @llvm.x86.avx10.vfmaddpd256 - return _mm256_fnmadd_round_pd(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_mask3_fnmadd_round_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { -// CHECK-LABEL: @test_mm256_mask3_fnmadd_round_pd -// CHECK: fneg <4 x double> -// CHECK: @llvm.x86.avx10.vfmaddpd256 -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} - return _mm256_mask3_fnmadd_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_maskz_fnmadd_round_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { -// CHECK-LABEL: @test_mm256_maskz_fnmadd_round_pd -// CHECK: fneg <4 x double> -// CHECK: @llvm.x86.avx10.vfmaddpd256 -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> zeroinitializer - return _mm256_maskz_fnmadd_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_fnmsub_round_pd(__m256d __A, __m256d __B, __m256d __C) { -// CHECK-LABEL: @test_mm256_fnmsub_round_pd -// CHECK: fneg <4 x double> -// CHECK: fneg <4 x double> -// CHECK: @llvm.x86.avx10.vfmaddpd256 - return _mm256_fnmsub_round_pd(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_maskz_fnmsub_round_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { -// CHECK-LABEL: @test_mm256_maskz_fnmsub_round_pd -// CHECK: fneg <4 x double> -// CHECK: fneg <4 x double> -// CHECK: @llvm.x86.avx10.vfmaddpd256 -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> zeroinitializer - return _mm256_maskz_fnmsub_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_fmadd_round_ph(__m256h __A, __m256h __B, __m256h __C) { -// CHECK-LABEL: @test_mm256_fmadd_round_ph -// CHECK: @llvm.x86.avx10.vfmaddph256 - return _mm256_fmadd_round_ph(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_mask_fmadd_round_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) { -// CHECK-LABEL: @test_mm256_mask_fmadd_round_ph -// CHECK: @llvm.x86.avx10.vfmaddph256 -// CHECK: bitcast i16 %{{.*}} to <16 x i1> -// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}} - return _mm256_mask_fmadd_round_ph(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_mask3_fmadd_round_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { -// CHECK-LABEL: @test_mm256_mask3_fmadd_round_ph -// CHECK: @llvm.x86.avx10.vfmaddph256 -// CHECK: bitcast i16 %{{.*}} to <16 x i1> -// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}} - return _mm256_mask3_fmadd_round_ph(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_maskz_fmadd_round_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { -// CHECK-LABEL: @test_mm256_maskz_fmadd_round_ph -// CHECK: @llvm.x86.avx10.vfmaddph256 -// CHECK: bitcast i16 %{{.*}} to <16 x i1> -// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> zeroinitializer - return _mm256_maskz_fmadd_round_ph(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_fmsub_round_ph(__m256h __A, __m256h __B, __m256h __C) { -// CHECK-LABEL: @test_mm256_fmsub_round_ph -// CHECK: fneg -// CHECK: @llvm.x86.avx10.vfmaddph256 - return _mm256_fmsub_round_ph(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_mask_fmsub_round_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) { -// CHECK-LABEL: @test_mm256_mask_fmsub_round_ph -// CHECK: fneg -// CHECK: @llvm.x86.avx10.vfmaddph256 -// CHECK: bitcast i16 %{{.*}} to <16 x i1> -// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}} - return _mm256_mask_fmsub_round_ph(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_maskz_fmsub_round_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { -// CHECK-LABEL: @test_mm256_maskz_fmsub_round_ph -// CHECK: fneg -// CHECK: @llvm.x86.avx10.vfmaddph256 -// CHECK: bitcast i16 %{{.*}} to <16 x i1> -// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> zeroinitializer - return _mm256_maskz_fmsub_round_ph(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_fnmadd_round_ph(__m256h __A, __m256h __B, __m256h __C) { -// CHECK-LABEL: @test_mm256_fnmadd_round_ph -// CHECK: fneg -// CHECK: @llvm.x86.avx10.vfmaddph256 - return _mm256_fnmadd_round_ph(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_mask3_fnmadd_round_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { -// CHECK-LABEL: @test_mm256_mask3_fnmadd_round_ph -// CHECK: fneg -// CHECK: @llvm.x86.avx10.vfmaddph256 -// CHECK: bitcast i16 %{{.*}} to <16 x i1> -// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}} - return _mm256_mask3_fnmadd_round_ph(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_maskz_fnmadd_round_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { -// CHECK-LABEL: @test_mm256_maskz_fnmadd_round_ph -// CHECK: fneg -// CHECK: @llvm.x86.avx10.vfmaddph256 -// CHECK: bitcast i16 %{{.*}} to <16 x i1> -// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> zeroinitializer - return _mm256_maskz_fnmadd_round_ph(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_fnmsub_round_ph(__m256h __A, __m256h __B, __m256h __C) { -// CHECK-LABEL: @test_mm256_fnmsub_round_ph -// CHECK: fneg -// CHECK: fneg -// CHECK: @llvm.x86.avx10.vfmaddph256 - return _mm256_fnmsub_round_ph(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_maskz_fnmsub_round_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { -// CHECK-LABEL: @test_mm256_maskz_fnmsub_round_ph -// CHECK: fneg -// CHECK: fneg -// CHECK: @llvm.x86.avx10.vfmaddph256 -// CHECK: bitcast i16 %{{.*}} to <16 x i1> -// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> zeroinitializer - return _mm256_maskz_fnmsub_round_ph(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_fmadd_round_ps(__m256 __A, __m256 __B, __m256 __C) { -// CHECK-LABEL: @test_mm256_fmadd_round_ps -// CHECK: @llvm.x86.avx10.vfmaddps256 - return _mm256_fmadd_round_ps(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_mask_fmadd_round_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { -// CHECK-LABEL: @test_mm256_mask_fmadd_round_ps -// CHECK: @llvm.x86.avx10.vfmaddps256 -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_mask_fmadd_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_mask3_fmadd_round_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { -// CHECK-LABEL: @test_mm256_mask3_fmadd_round_ps -// CHECK: @llvm.x86.avx10.vfmaddps256 -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_mask3_fmadd_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_maskz_fmadd_round_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { -// CHECK-LABEL: @test_mm256_maskz_fmadd_round_ps -// CHECK: @llvm.x86.avx10.vfmaddps256 -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> zeroinitializer - return _mm256_maskz_fmadd_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_fmsub_round_ps(__m256 __A, __m256 __B, __m256 __C) { -// CHECK-LABEL: @test_mm256_fmsub_round_ps -// CHECK: fneg <8 x float> %{{.*}} -// CHECK: @llvm.x86.avx10.vfmaddps256 - return _mm256_fmsub_round_ps(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_mask_fmsub_round_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { -// CHECK-LABEL: @test_mm256_mask_fmsub_round_ps -// CHECK: fneg <8 x float> %{{.*}} -// CHECK: @llvm.x86.avx10.vfmaddps256 -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_mask_fmsub_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_maskz_fmsub_round_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { -// CHECK-LABEL: @test_mm256_maskz_fmsub_round_ps -// CHECK: fneg <8 x float> %{{.*}} -// CHECK: @llvm.x86.avx10.vfmaddps256 -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> zeroinitializer - return _mm256_maskz_fmsub_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_fnmadd_round_ps(__m256 __A, __m256 __B, __m256 __C) { -// CHECK-LABEL: @test_mm256_fnmadd_round_ps -// CHECK: fneg <8 x float> %{{.*}} -// CHECK: @llvm.x86.avx10.vfmaddps256 - return _mm256_fnmadd_round_ps(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_mask3_fnmadd_round_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { -// CHECK-LABEL: @test_mm256_mask3_fnmadd_round_ps -// CHECK: fneg <8 x float> %{{.*}} -// CHECK: @llvm.x86.avx10.vfmaddps256 -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_mask3_fnmadd_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_maskz_fnmadd_round_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { -// CHECK-LABEL: @test_mm256_maskz_fnmadd_round_ps -// CHECK: fneg <8 x float> %{{.*}} -// CHECK: @llvm.x86.avx10.vfmaddps256 -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> zeroinitializer - return _mm256_maskz_fnmadd_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_fnmsub_round_ps(__m256 __A, __m256 __B, __m256 __C) { -// CHECK-LABEL: @test_mm256_fnmsub_round_ps -// CHECK: fneg <8 x float> %{{.*}} -// CHECK: fneg <8 x float> %{{.*}} -// CHECK: @llvm.x86.avx10.vfmaddps256 - return _mm256_fnmsub_round_ps(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_maskz_fnmsub_round_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { -// CHECK-LABEL: @test_mm256_maskz_fnmsub_round_ps -// CHECK: fneg <8 x float> %{{.*}} -// CHECK: fneg <8 x float> %{{.*}} -// CHECK: @llvm.x86.avx10.vfmaddps256 -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> zeroinitializer - return _mm256_maskz_fnmsub_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_fmadd_round_pch(__m256h __A, __m256h __B, __m256h __C) { -// CHECK-LABEL: @test_mm256_fmadd_round_pch -// CHECK: @llvm.x86.avx10.mask.vfmaddcph256 - return _mm256_fmadd_round_pch(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_mask_fmadd_round_pch(__m256h __A, __mmask8 __U, __m256h __B, __m256h __C) { -// CHECK-LABEL: @test_mm256_mask_fmadd_round_pch -// CHECK: @llvm.x86.avx10.mask.vfmaddcph256 -// CHECK: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_mask_fmadd_round_pch(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_mask3_fmadd_round_pch(__m256h __A, __m256h __B, __m256h __C, __mmask8 __U) { -// CHECK-LABEL: @test_mm256_mask3_fmadd_round_pch -// CHECK: @llvm.x86.avx10.mask.vfmaddcph256 -// CHECK-NOT: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_mask3_fmadd_round_pch(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_maskz_fmadd_round_pch(__mmask8 __U, __m256h __A, __m256h __B, __m256h __C) { -// CHECK-LABEL: @test_mm256_maskz_fmadd_round_pch -// CHECK: @llvm.x86.avx10.maskz.vfmaddcph256 - return _mm256_maskz_fmadd_round_pch(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_fmaddsub_round_pd(__m256d __A, __m256d __B, __m256d __C) { -// CHECK-LABEL: @test_mm256_fmaddsub_round_pd -// CHECK: @llvm.x86.avx10.vfmaddsubpd256 - return _mm256_fmaddsub_round_pd(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_mask_fmaddsub_round_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { -// CHECK-LABEL: @test_mm256_mask_fmaddsub_round_pd -// CHECK: @llvm.x86.avx10.vfmaddsubpd256 -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} - return _mm256_mask_fmaddsub_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_mask3_fmaddsub_round_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { -// CHECK-LABEL: @test_mm256_mask3_fmaddsub_round_pd -// CHECK: @llvm.x86.avx10.vfmaddsubpd256 -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} - return _mm256_mask3_fmaddsub_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_maskz_fmaddsub_round_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { -// CHECK-LABEL: @test_mm256_maskz_fmaddsub_round_pd -// CHECK: @llvm.x86.avx10.vfmaddsubpd256 -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> zeroinitializer - return _mm256_maskz_fmaddsub_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_fmsubadd_round_pd(__m256d __A, __m256d __B, __m256d __C) { -// CHECK-LABEL: @test_mm256_fmsubadd_round_pd -// CHECK: fneg <4 x double> %{{.*}} -// CHECK: @llvm.x86.avx10.vfmaddsubpd256 - return _mm256_fmsubadd_round_pd(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_mask_fmsubadd_round_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { -// CHECK-LABEL: @test_mm256_mask_fmsubadd_round_pd -// CHECK: fneg <4 x double> %{{.*}} -// CHECK: @llvm.x86.avx10.vfmaddsubpd256 -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} - return _mm256_mask_fmsubadd_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_maskz_fmsubadd_round_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { -// CHECK-LABEL: @test_mm256_maskz_fmsubadd_round_pd -// CHECK: fneg <4 x double> %{{.*}} -// CHECK: @llvm.x86.avx10.vfmaddsubpd256 -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> zeroinitializer - return _mm256_maskz_fmsubadd_round_pd(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_fmaddsub_round_ph(__m256h __A, __m256h __B, __m256h __C) { -// CHECK-LABEL: @test_mm256_fmaddsub_round_ph -// CHECK: @llvm.x86.avx10.vfmaddsubph256 - return _mm256_fmaddsub_round_ph(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_mask_fmaddsub_round_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) { -// CHECK-LABEL: @test_mm256_mask_fmaddsub_round_ph -// CHECK: @llvm.x86.avx10.vfmaddsubph256 -// CHECK: bitcast i16 %{{.*}} to <16 x i1> -// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}} - return _mm256_mask_fmaddsub_round_ph(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_mask3_fmaddsub_round_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { -// CHECK-LABEL: @test_mm256_mask3_fmaddsub_round_ph -// CHECK: @llvm.x86.avx10.vfmaddsubph256 -// CHECK: bitcast i16 %{{.*}} to <16 x i1> -// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}} - return _mm256_mask3_fmaddsub_round_ph(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_maskz_fmaddsub_round_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { -// CHECK-LABEL: @test_mm256_maskz_fmaddsub_round_ph -// CHECK: @llvm.x86.avx10.vfmaddsubph256 -// CHECK: bitcast i16 %{{.*}} to <16 x i1> -// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> zeroinitializer - return _mm256_maskz_fmaddsub_round_ph(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_fmsubadd_round_ph(__m256h __A, __m256h __B, __m256h __C) { -// CHECK-LABEL: @test_mm256_fmsubadd_round_ph -// CHECK: fneg -// CHECK: @llvm.x86.avx10.vfmaddsubph256 - return _mm256_fmsubadd_round_ph(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_mask_fmsubadd_round_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) { -// CHECK-LABEL: @test_mm256_mask_fmsubadd_round_ph -// CHECK: fneg -// CHECK: @llvm.x86.avx10.vfmaddsubph256 -// CHECK: bitcast i16 %{{.*}} to <16 x i1> -// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}} - return _mm256_mask_fmsubadd_round_ph(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_maskz_fmsubadd_round_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { -// CHECK-LABEL: @test_mm256_maskz_fmsubadd_round_ph -// CHECK: fneg -// CHECK: @llvm.x86.avx10.vfmaddsubph256 -// CHECK: bitcast i16 %{{.*}} to <16 x i1> -// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> zeroinitializer - return _mm256_maskz_fmsubadd_round_ph(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_fmaddsub_round_ps(__m256 __A, __m256 __B, __m256 __C) { -// CHECK-LABEL: @test_mm256_fmaddsub_round_ps -// CHECK: @llvm.x86.avx10.vfmaddsubps256 - return _mm256_fmaddsub_round_ps(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_mask_fmaddsub_round_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { -// CHECK-LABEL: @test_mm256_mask_fmaddsub_round_ps -// CHECK: @llvm.x86.avx10.vfmaddsubps256 -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_mask_fmaddsub_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_mask3_fmaddsub_round_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { -// CHECK-LABEL: @test_mm256_mask3_fmaddsub_round_ps -// CHECK: @llvm.x86.avx10.vfmaddsubps256 -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_mask3_fmaddsub_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_maskz_fmaddsub_round_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { -// CHECK-LABEL: @test_mm256_maskz_fmaddsub_round_ps -// CHECK: @llvm.x86.avx10.vfmaddsubps256 -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> zeroinitializer - return _mm256_maskz_fmaddsub_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_fmsubadd_round_ps(__m256 __A, __m256 __B, __m256 __C) { -// CHECK-LABEL: @test_mm256_fmsubadd_round_ps -// CHECK: fneg <8 x float> %{{.*}} -// CHECK: @llvm.x86.avx10.vfmaddsubps256 - return _mm256_fmsubadd_round_ps(__A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_mask_fmsubadd_round_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { -// CHECK-LABEL: @test_mm256_mask_fmsubadd_round_ps -// CHECK: fneg <8 x float> %{{.*}} -// CHECK: @llvm.x86.avx10.vfmaddsubps256 -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_mask_fmsubadd_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_maskz_fmsubadd_round_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { -// CHECK-LABEL: @test_mm256_maskz_fmsubadd_round_ps -// CHECK: fneg <8 x float> %{{.*}} -// CHECK: @llvm.x86.avx10.vfmaddsubps256 -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> zeroinitializer - return _mm256_maskz_fmsubadd_round_ps(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_mask3_fmsub_round_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { -// CHECK-LABEL: @test_mm256_mask3_fmsub_round_pd -// CHECK: fneg <4 x double> %{{.*}} -// CHECK: @llvm.x86.avx10.vfmaddpd256 -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} - return _mm256_mask3_fmsub_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_mask3_fmsubadd_round_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { -// CHECK-LABEL: @test_mm256_mask3_fmsubadd_round_pd -// CHECK: fneg <4 x double> %{{.*}} -// CHECK: @llvm.x86.avx10.vfmaddsubpd256 -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} - return _mm256_mask3_fmsubadd_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_mask_fnmadd_round_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { -// CHECK-LABEL: @test_mm256_mask_fnmadd_round_pd -// CHECK: fneg <4 x double> -// CHECK: @llvm.x86.avx10.vfmaddpd256 -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} - return _mm256_mask_fnmadd_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_mask_fnmsub_round_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { -// CHECK-LABEL: @test_mm256_mask_fnmsub_round_pd -// CHECK: fneg <4 x double> -// CHECK: fneg <4 x double> -// CHECK: @llvm.x86.avx10.vfmaddpd256 -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} - return _mm256_mask_fnmsub_round_pd(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_mask3_fnmsub_round_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { -// CHECK-LABEL: @test_mm256_mask3_fnmsub_round_pd -// CHECK: fneg <4 x double> -// CHECK: fneg <4 x double> -// CHECK: @llvm.x86.avx10.vfmaddpd256 -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} - return _mm256_mask3_fnmsub_round_pd(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_mask3_fmsub_round_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { -// CHECK-LABEL: @test_mm256_mask3_fmsub_round_ph -// CHECK: fneg -// CHECK: @llvm.x86.avx10.vfmaddph256 -// CHECK: bitcast i16 %{{.*}} to <16 x i1> -// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}} - return _mm256_mask3_fmsub_round_ph(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_mask3_fmsubadd_round_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { -// CHECK-LABEL: @test_mm256_mask3_fmsubadd_round_ph -// CHECK: fneg -// CHECK: @llvm.x86.avx10.vfmaddsubph256 -// CHECK: bitcast i16 %{{.*}} to <16 x i1> -// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}} - return _mm256_mask3_fmsubadd_round_ph(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_mask_fnmadd_round_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) { -// CHECK-LABEL: @test_mm256_mask_fnmadd_round_ph -// CHECK: fneg -// CHECK: @llvm.x86.avx10.vfmaddph256 -// CHECK: bitcast i16 %{{.*}} to <16 x i1> -// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}} - return _mm256_mask_fnmadd_round_ph(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_mask_fnmsub_round_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) { -// CHECK-LABEL: @test_mm256_mask_fnmsub_round_ph -// CHECK: fneg -// CHECK: fneg -// CHECK: @llvm.x86.avx10.vfmaddph256 -// CHECK: bitcast i16 %{{.*}} to <16 x i1> -// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}} - return _mm256_mask_fnmsub_round_ph(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_mask3_fnmsub_round_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { -// CHECK-LABEL: @test_mm256_mask3_fnmsub_round_ph -// CHECK: fneg -// CHECK: fneg -// CHECK: @llvm.x86.avx10.vfmaddph256 -// CHECK: bitcast i16 %{{.*}} to <16 x i1> -// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}} - return _mm256_mask3_fnmsub_round_ph(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_mask3_fmsub_round_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { -// CHECK-LABEL: @test_mm256_mask3_fmsub_round_ps -// CHECK: fneg <8 x float> %{{.*}} -// CHECK: @llvm.x86.avx10.vfmaddps256 -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_mask3_fmsub_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_mask3_fmsubadd_round_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { -// CHECK-LABEL: @test_mm256_mask3_fmsubadd_round_ps -// CHECK: fneg <8 x float> %{{.*}} -// CHECK: @llvm.x86.avx10.vfmaddsubps256 -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_mask3_fmsubadd_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_mask_fnmadd_round_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { -// CHECK-LABEL: @test_mm256_mask_fnmadd_round_ps -// CHECK: fneg <8 x float> %{{.*}} -// CHECK: @llvm.x86.avx10.vfmaddps256 -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_mask_fnmadd_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_mask_fnmsub_round_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { -// CHECK-LABEL: @test_mm256_mask_fnmsub_round_ps -// CHECK: fneg <8 x float> %{{.*}} -// CHECK: fneg <8 x float> %{{.*}} -// CHECK: @llvm.x86.avx10.vfmaddps256 -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_mask_fnmsub_round_ps(__A, __U, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_mask3_fnmsub_round_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { -// CHECK-LABEL: @test_mm256_mask3_fnmsub_round_ps -// CHECK: fneg <8 x float> %{{.*}} -// CHECK: fneg <8 x float> %{{.*}} -// CHECK: @llvm.x86.avx10.vfmaddps256 -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_mask3_fnmsub_round_ps(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_mul_round_pch(__m256h __A, __m256h __B) { -// CHECK-LABEL: @test_mm256_mul_round_pch -// CHECK: @llvm.x86.avx10.mask.vfmulcph256 - return _mm256_mul_round_pch(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_mask_mul_round_pch(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) { -// CHECK-LABEL: @test_mm256_mask_mul_round_pch -// CHECK: @llvm.x86.avx10.mask.vfmulcph256 - return _mm256_mask_mul_round_pch(__W, __U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_maskz_mul_round_pch(__mmask8 __U, __m256h __A, __m256h __B) { -// CHECK-LABEL: @test_mm256_maskz_mul_round_pch -// CHECK: @llvm.x86.avx10.mask.vfmulcph256 - return _mm256_maskz_mul_round_pch(__U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_getexp_round_pd(__m256d __A) { -// CHECK-LABEL: @test_mm256_getexp_round_pd -// CHECK: @llvm.x86.avx10.mask.vgetexppd256 - return _mm256_getexp_round_pd(__A, _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_mask_getexp_round_pd(__m256d __W, __mmask8 __U, __m256d __A) { -// CHECK-LABEL: @test_mm256_mask_getexp_round_pd -// CHECK: @llvm.x86.avx10.mask.vgetexppd256 - return _mm256_mask_getexp_round_pd(__W, __U, __A, _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_maskz_getexp_round_pd(__mmask8 __U, __m256d __A) { -// CHECK-LABEL: @test_mm256_maskz_getexp_round_pd -// CHECK: @llvm.x86.avx10.mask.vgetexppd256 - return _mm256_maskz_getexp_round_pd(__U, __A, _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_getexp_round_ph(__m256h __A) { -// CHECK-LABEL: @test_mm256_getexp_round_ph -// CHECK: @llvm.x86.avx10.mask.vgetexpph256 - return _mm256_getexp_round_ph(__A, _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_mask_getexp_round_ph(__m256h __W, __mmask16 __U, __m256h __A) { -// CHECK-LABEL: @test_mm256_mask_getexp_round_ph -// CHECK: @llvm.x86.avx10.mask.vgetexpph256 - return _mm256_mask_getexp_round_ph(__W, __U, __A, _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_maskz_getexp_round_ph(__mmask16 __U, __m256h __A) { -// CHECK-LABEL: @test_mm256_maskz_getexp_round_ph -// CHECK: @llvm.x86.avx10.mask.vgetexpph256 - return _mm256_maskz_getexp_round_ph(__U, __A, _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_getexp_round_ps(__m256 __A) { -// CHECK-LABEL: @test_mm256_getexp_round_ps -// CHECK: @llvm.x86.avx10.mask.vgetexpps256 - return _mm256_getexp_round_ps(__A, _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_mask_getexp_round_ps(__m256 __W, __mmask8 __U, __m256 __A) { -// CHECK-LABEL: @test_mm256_mask_getexp_round_ps -// CHECK: @llvm.x86.avx10.mask.vgetexpps256 - return _mm256_mask_getexp_round_ps(__W, __U, __A, _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_maskz_getexp_round_ps(__mmask8 __U, __m256 __A) { -// CHECK-LABEL: @test_mm256_maskz_getexp_round_ps -// CHECK: @llvm.x86.avx10.mask.vgetexpps256 - return _mm256_maskz_getexp_round_ps(__U, __A, _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_getmant_round_pd(__m256d __A) { -// CHECK-LABEL: @test_mm256_getmant_round_pd -// CHECK: @llvm.x86.avx10.mask.vgetmantpd256 - return _mm256_getmant_round_pd(__A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan, _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_mask_getmant_round_pd(__m256d __W, __mmask8 __U, __m256d __A) { -// CHECK-LABEL: @test_mm256_mask_getmant_round_pd -// CHECK: @llvm.x86.avx10.mask.vgetmantpd256 - return _mm256_mask_getmant_round_pd(__W, __U, __A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan, _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_maskz_getmant_round_pd(__mmask8 __U, __m256d __A) { -// CHECK-LABEL: @test_mm256_maskz_getmant_round_pd -// CHECK: @llvm.x86.avx10.mask.vgetmantpd256 - return _mm256_maskz_getmant_round_pd(__U, __A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan, _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_getmant_round_ph(__m256h __A) { -// CHECK-LABEL: @test_mm256_getmant_round_ph -// CHECK: @llvm.x86.avx10.mask.vgetmantph256 - return _mm256_getmant_round_ph(__A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan, _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_mask_getmant_round_ph(__m256h __W, __mmask16 __U, __m256h __A) { -// CHECK-LABEL: @test_mm256_mask_getmant_round_ph -// CHECK: @llvm.x86.avx10.mask.vgetmantph256 - return _mm256_mask_getmant_round_ph(__W, __U, __A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan, _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_maskz_getmant_round_ph(__mmask16 __U, __m256h __A) { -// CHECK-LABEL: @test_mm256_maskz_getmant_round_ph -// CHECK: @llvm.x86.avx10.mask.vgetmantph256 - return _mm256_maskz_getmant_round_ph(__U, __A, _MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan, _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_getmant_round_ps(__m256 __A) { -// CHECK-LABEL: @test_mm256_getmant_round_ps -// CHECK: @llvm.x86.avx10.mask.vgetmantps256 - return _mm256_getmant_round_ps(__A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan, _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_mask_getmant_round_ps(__m256 __W, __mmask8 __U, __m256 __A) { -// CHECK-LABEL: @test_mm256_mask_getmant_round_ps -// CHECK: @llvm.x86.avx10.mask.vgetmantps256 - return _mm256_mask_getmant_round_ps(__W, __U, __A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan, _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_maskz_getmant_round_ps(__mmask8 __U, __m256 __A) { -// CHECK-LABEL: @test_mm256_maskz_getmant_round_ps -// CHECK: @llvm.x86.avx10.mask.vgetmantps256 - return _mm256_maskz_getmant_round_ps(__U, __A,_MM_MANT_NORM_p5_2, _MM_MANT_SIGN_nan, _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_max_round_pd(__m256d __A, __m256d __B) { -// CHECK-LABEL: @test_mm256_max_round_pd -// CHECK: @llvm.x86.avx10.vmaxpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 8) - return _mm256_max_round_pd(__A, __B, _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_mask_max_round_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { -// CHECK-LABEL: @test_mm256_mask_max_round_pd -// CHECK: @llvm.x86.avx10.vmaxpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 8) -// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} - return _mm256_mask_max_round_pd(__W, __U, __A, __B, _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_maskz_max_round_pd(__mmask8 __U, __m256d __A, __m256d __B) { -// CHECK-LABEL: @test_mm256_maskz_max_round_pd -// CHECK: @llvm.x86.avx10.vmaxpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 8) -// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} - return _mm256_maskz_max_round_pd(__U, __A, __B, _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_max_round_ph(__m256h __A, __m256h __B) { -// CHECK-LABEL: @test_mm256_max_round_ph -// CHECK: @llvm.x86.avx10.vmaxph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 8) - return _mm256_max_round_ph(__A, __B, _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_mask_max_round_ph(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) { -// CHECK-LABEL: @test_mm256_mask_max_round_ph -// CHECK: @llvm.x86.avx10.vmaxph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 8) -// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}} - return _mm256_mask_max_round_ph(__W, __U, __A, __B, _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_maskz_max_round_ph(__mmask8 __U, __m256h __A, __m256h __B) { -// CHECK-LABEL: @test_mm256_maskz_max_round_ph -// CHECK: @llvm.x86.avx10.vmaxph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 8) -// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}} - return _mm256_maskz_max_round_ph(__U, __A, __B, _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_max_round_ps(__m256 __A, __m256 __B) { -// CHECK-LABEL: @test_mm256_max_round_ps -// CHECK: @llvm.x86.avx10.vmaxps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 8) - return _mm256_max_round_ps(__A, __B, _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_mask_max_round_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { -// CHECK-LABEL: @test_mm256_mask_max_round_ps -// CHECK: @llvm.x86.avx10.vmaxps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 8) -// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_mask_max_round_ps(__W, __U, __A, __B, _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_maskz_max_round_ps(__mmask8 __U, __m256 __A, __m256 __B) { -// CHECK-LABEL: @test_mm256_maskz_max_round_ps -// CHECK: @llvm.x86.avx10.vmaxps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 8) -// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_maskz_max_round_ps(__U, __A, __B, _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_min_round_pd(__m256d __A, __m256d __B) { -// CHECK-LABEL: @test_mm256_min_round_pd -// CHECK: @llvm.x86.avx10.vminpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 8) - return _mm256_min_round_pd(__A, __B, _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_mask_min_round_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { -// CHECK-LABEL: @test_mm256_mask_min_round_pd -// CHECK: @llvm.x86.avx10.vminpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 8) -// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} - return _mm256_mask_min_round_pd(__W, __U, __A, __B, _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_maskz_min_round_pd(__mmask8 __U, __m256d __A, __m256d __B) { -// CHECK-LABEL: @test_mm256_maskz_min_round_pd -// CHECK: @llvm.x86.avx10.vminpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 8) -// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} - return _mm256_maskz_min_round_pd(__U, __A, __B, _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_min_round_ph(__m256h __A, __m256h __B) { -// CHECK-LABEL: @test_mm256_min_round_ph -// CHECK: @llvm.x86.avx10.vminph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 8) - return _mm256_min_round_ph(__A, __B, _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_mask_min_round_ph(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) { -// CHECK-LABEL: @test_mm256_mask_min_round_ph -// CHECK: @llvm.x86.avx10.vminph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 8) -// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}} - return _mm256_mask_min_round_ph(__W, __U, __A, __B, _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_maskz_min_round_ph(__mmask8 __U, __m256h __A, __m256h __B) { -// CHECK-LABEL: @test_mm256_maskz_min_round_ph -// CHECK: @llvm.x86.avx10.vminph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 8) -// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}} - return _mm256_maskz_min_round_ph(__U, __A, __B, _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_min_round_ps(__m256 __A, __m256 __B) { -// CHECK-LABEL: @test_mm256_min_round_ps -// CHECK: @llvm.x86.avx10.vminps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 8) - return _mm256_min_round_ps(__A, __B, _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_mask_min_round_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { -// CHECK-LABEL: @test_mm256_mask_min_round_ps -// CHECK: @llvm.x86.avx10.vminps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 8) -// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_mask_min_round_ps(__W, __U, __A, __B, _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_maskz_min_round_ps(__mmask8 __U, __m256 __A, __m256 __B) { -// CHECK-LABEL: @test_mm256_maskz_min_round_ps -// CHECK: @llvm.x86.avx10.vminps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 8) -// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_maskz_min_round_ps(__U, __A, __B, _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_mul_round_pd(__m256d __A, __m256d __B) { -// CHECK-LABEL: @test_mm256_mul_round_pd -// CHECK: @llvm.x86.avx10.vmulpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 11) - return _mm256_mul_round_pd(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_mask_mul_round_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { -// CHECK-LABEL: @test_mm256_mask_mul_round_pd -// CHECK: @llvm.x86.avx10.vmulpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 10) -// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} - return _mm256_mask_mul_round_pd(__W, __U, __A, __B, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_maskz_mul_round_pd(__mmask8 __U, __m256d __A, __m256d __B) { -// CHECK-LABEL: @test_mm256_maskz_mul_round_pd -// CHECK: @llvm.x86.avx10.vmulpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 9) -// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} - return _mm256_maskz_mul_round_pd(__U, __A, __B, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_mul_round_ph(__m256h __A, __m256h __B) { -// CHECK-LABEL: @test_mm256_mul_round_ph -// CHECK: @llvm.x86.avx10.vmulph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 11) - return _mm256_mul_round_ph(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_mask_mul_round_ph(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) { -// CHECK-LABEL: @test_mm256_mask_mul_round_ph -// CHECK: @llvm.x86.avx10.vmulph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 10) -// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}} - return _mm256_mask_mul_round_ph(__W, __U, __A, __B, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_maskz_mul_round_ph(__mmask8 __U, __m256h __A, __m256h __B) { -// CHECK-LABEL: @test_mm256_maskz_mul_round_ph -// CHECK: @llvm.x86.avx10.vmulph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 9) -// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}} - return _mm256_maskz_mul_round_ph(__U, __A, __B, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_mul_round_ps(__m256 __A, __m256 __B) { -// CHECK-LABEL: @test_mm256_mul_round_ps -// CHECK: @llvm.x86.avx10.vmulps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 11) - return _mm256_mul_round_ps(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_mask_mul_round_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { -// CHECK-LABEL: @test_mm256_mask_mul_round_ps -// CHECK: @llvm.x86.avx10.vmulps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 10) -// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_mask_mul_round_ps(__W, __U, __A, __B, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_maskz_mul_round_ps(__mmask8 __U, __m256 __A, __m256 __B) { -// CHECK-LABEL: @test_mm256_maskz_mul_round_ps -// CHECK: @llvm.x86.avx10.vmulps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 9) -// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_maskz_mul_round_ps(__U, __A, __B, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_range_round_pd(__m256d __A, __m256d __B) { -// CHECK-LABEL: @test_mm256_range_round_pd -// CHECK: @llvm.x86.avx10.mask.vrangepd256 - return _mm256_range_round_pd(__A, __B, 4, 8); -} - -__m256d test_mm256_mask_range_round_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { -// CHECK-LABEL: @test_mm256_mask_range_round_pd -// CHECK: @llvm.x86.avx10.mask.vrangepd256 - return _mm256_mask_range_round_pd(__W, __U, __A, __B, 4, 8); -} - -__m256d test_mm256_maskz_range_round_pd(__mmask8 __U, __m256d __A, __m256d __B) { -// CHECK-LABEL: @test_mm256_maskz_range_round_pd -// CHECK: @llvm.x86.avx10.mask.vrangepd256 - return _mm256_maskz_range_round_pd(__U, __A, __B, 4, 8); -} - -__m256 test_mm256_range_round_ps(__m256 __A, __m256 __B) { -// CHECK-LABEL: @test_mm256_range_round_ps -// CHECK: @llvm.x86.avx10.mask.vrangeps256 - return _mm256_range_round_ps(__A, __B, 4, 8); -} - -__m256 test_mm256_mask_range_round_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { -// CHECK-LABEL: @test_mm256_mask_range_round_ps -// CHECK: @llvm.x86.avx10.mask.vrangeps256 - return _mm256_mask_range_round_ps(__W, __U, __A, __B, 4, 8); -} - -__m256 test_mm256_maskz_range_round_ps(__mmask8 __U, __m256 __A, __m256 __B) { -// CHECK-LABEL: @test_mm256_maskz_range_round_ps -// CHECK: @llvm.x86.avx10.mask.vrangeps256 - return _mm256_maskz_range_round_ps(__U, __A, __B, 4, 8); -} - -__m256d test_mm256_reduce_round_pd(__m256d __A) { -// CHECK-LABEL: @test_mm256_reduce_round_pd -// CHECK: @llvm.x86.avx10.mask.vreducepd256 - return _mm256_reduce_round_pd(__A, 4, 8); -} - -__m256d test_mm256_mask_reduce_round_pd(__m256d __W, __mmask8 __U, __m256d __A) { -// CHECK-LABEL: @test_mm256_mask_reduce_round_pd -// CHECK: @llvm.x86.avx10.mask.vreducepd256 - return _mm256_mask_reduce_round_pd(__W, __U, __A, 4, 8); -} - -__m256d test_mm256_maskz_reduce_round_pd(__mmask8 __U, __m256d __A) { -// CHECK-LABEL: @test_mm256_maskz_reduce_round_pd -// CHECK: @llvm.x86.avx10.mask.vreducepd256 - return _mm256_maskz_reduce_round_pd(__U, __A, 4, 8); -} - -__m256h test_mm256_mask_reduce_round_ph(__m256h __A, __mmask8 __U, __m256h __C) { -// CHECK-LABEL: @test_mm256_mask_reduce_round_ph -// CHECK: @llvm.x86.avx10.mask.vreduceph256 - return _mm256_mask_reduce_round_ph(__A, __U, __C, 3, _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_maskz_reduce_round_ph(__m256h __A, __mmask8 __U) { -// CHECK-LABEL: @test_mm256_maskz_reduce_round_ph -// CHECK: @llvm.x86.avx10.mask.vreduceph256 - return _mm256_maskz_reduce_round_ph(__U, __A, 3, _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_reduce_round_ph(__m256h __A) { -// CHECK-LABEL: @test_mm256_reduce_round_ph -// CHECK: @llvm.x86.avx10.mask.vreduceph256 - return _mm256_reduce_round_ph(__A, 3, _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_reduce_round_ps(__m256 __A) { -// CHECK-LABEL: @test_mm256_reduce_round_ps -// CHECK: @llvm.x86.avx10.mask.vreduceps256 - return _mm256_reduce_round_ps(__A, 4, 8); -} - -__m256 test_mm256_mask_reduce_round_ps(__m256 __W, __mmask8 __U, __m256 __A) { -// CHECK-LABEL: @test_mm256_mask_reduce_round_ps -// CHECK: @llvm.x86.avx10.mask.vreduceps256 - return _mm256_mask_reduce_round_ps(__W, __U, __A, 4, 8); -} - -__m256 test_mm256_maskz_reduce_round_ps(__mmask8 __U, __m256 __A) { -// CHECK-LABEL: @test_mm256_maskz_reduce_round_ps -// CHECK: @llvm.x86.avx10.mask.vreduceps256 - return _mm256_maskz_reduce_round_ps(__U, __A, 4, 8); -} - -__m256d test_mm256_roundscale_round_pd(__m256d __A) -{ -// CHECK-LABEL: @test_mm256_roundscale_round_pd -// CHECK: @llvm.x86.avx10.mask.vrndscalepd256 - return _mm256_roundscale_round_pd(__A,_MM_FROUND_TO_ZERO,_MM_FROUND_NO_EXC); -} - -__m256d test_mm256_mask_roundscale_round_pd(__m256d __A,__mmask8 __U,__m256d __C) -{ -// CHECK-LABEL: @test_mm256_mask_roundscale_round_pd -// CHECK: @llvm.x86.avx10.mask.vrndscalepd256 - return _mm256_mask_roundscale_round_pd(__A,__U,__C,_MM_FROUND_TO_ZERO,_MM_FROUND_NO_EXC); -} - -__m256d test_mm256_maskz_roundscale_round_pd(__m256d __A,__mmask8 __U) -{ -// CHECK-LABEL: @test_mm256_maskz_roundscale_round_pd -// CHECK: @llvm.x86.avx10.mask.vrndscalepd256 - return _mm256_maskz_roundscale_round_pd(__U,__A,_MM_FROUND_TO_ZERO,_MM_FROUND_NO_EXC); -} - -__m256h test_mm256_mask_roundscale_round_ph(__m256h __A, __mmask8 __U, __m256h __C) { -// CHECK-LABEL: @test_mm256_mask_roundscale_round_ph -// CHECK: @llvm.x86.avx10.mask.vrndscaleph256 - return _mm256_mask_roundscale_round_ph(__A, __U, __C, 3, _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_maskz_roundscale_round_ph(__m256h __A, __mmask8 __U) { -// CHECK-LABEL: @test_mm256_maskz_roundscale_round_ph -// CHECK: @llvm.x86.avx10.mask.vrndscaleph256 - return _mm256_maskz_roundscale_round_ph(__U, __A, 3, _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_roundscale_round_ph(__m256h __A) { -// CHECK-LABEL: @test_mm256_roundscale_round_ph -// CHECK: @llvm.x86.avx10.mask.vrndscaleph256 - return _mm256_roundscale_round_ph(__A, 3, _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_roundscale_round_ps(__m256 __A) -{ -// CHECK-LABEL: @test_mm256_roundscale_round_ps -// CHECK: @llvm.x86.avx10.mask.vrndscaleps256 - return _mm256_roundscale_round_ps(__A,_MM_FROUND_TO_ZERO,_MM_FROUND_NO_EXC); -} - -__m256 test_mm256_mask_roundscale_round_ps(__m256 __A,__mmask8 __U,__m256 __C) -{ -// CHECK-LABEL: @test_mm256_mask_roundscale_round_ps -// CHECK: @llvm.x86.avx10.mask.vrndscaleps256 - return _mm256_mask_roundscale_round_ps(__A,__U,__C,_MM_FROUND_TO_ZERO,_MM_FROUND_NO_EXC); -} - -__m256 test_mm256_maskz_roundscale_round_ps(__m256 __A,__mmask8 __U) -{ -// CHECK-LABEL: @test_mm256_maskz_roundscale_round_ps -// CHECK: @llvm.x86.avx10.mask.vrndscaleps256 - return _mm256_maskz_roundscale_round_ps(__U,__A,_MM_FROUND_TO_ZERO,_MM_FROUND_NO_EXC); -} - -__m256d test_mm256_scalef_round_pd(__m256d __A, __m256d __B) { -// CHECK-LABEL: @test_mm256_scalef_round_pd -// CHECK: @llvm.x86.avx10.mask.vscalefpd256 - return _mm256_scalef_round_pd(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_mask_scalef_round_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { -// CHECK-LABEL: @test_mm256_mask_scalef_round_pd -// CHECK: @llvm.x86.avx10.mask.vscalefpd256 - return _mm256_mask_scalef_round_pd(__W, __U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_maskz_scalef_round_pd(__mmask8 __U, __m256d __A, __m256d __B) { -// CHECK-LABEL: @test_mm256_maskz_scalef_round_pd -// CHECK: @llvm.x86.avx10.mask.vscalefpd256 - return _mm256_maskz_scalef_round_pd(__U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_scalef_round_ph(__m256h __A, __m256h __B) { -// CHECK-LABEL: @test_mm256_scalef_round_ph -// CHECK: @llvm.x86.avx10.mask.vscalefph256 - return _mm256_scalef_round_ph(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_mask_scalef_round_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) { -// CHECK-LABEL: @test_mm256_mask_scalef_round_ph -// CHECK: @llvm.x86.avx10.mask.vscalefph256 - return _mm256_mask_scalef_round_ph(__W, __U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_maskz_scalef_round_ph(__mmask16 __U, __m256h __A, __m256h __B) { -// CHECK-LABEL: @test_mm256_maskz_scalef_round_ph -// CHECK: @llvm.x86.avx10.mask.vscalefph256 - return _mm256_maskz_scalef_round_ph(__U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_scalef_round_ps(__m256 __A, __m256 __B) { -// CHECK-LABEL: @test_mm256_scalef_round_ps -// CHECK: @llvm.x86.avx10.mask.vscalefps256 - return _mm256_scalef_round_ps(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_mask_scalef_round_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { -// CHECK-LABEL: @test_mm256_mask_scalef_round_ps -// CHECK: @llvm.x86.avx10.mask.vscalefps256 - return _mm256_mask_scalef_round_ps(__W, __U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_maskz_scalef_round_ps(__mmask8 __U, __m256 __A, __m256 __B) { -// CHECK-LABEL: @test_mm256_maskz_scalef_round_ps -// CHECK: @llvm.x86.avx10.mask.vscalefps256 - return _mm256_maskz_scalef_round_ps(__U, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_sqrt_round_pd(__m256d __A) -{ -// CHECK-LABEL: @test_mm256_sqrt_round_pd -// CHECK: call <4 x double> @llvm.x86.avx10.vsqrtpd256(<4 x double> %{{.*}}, i32 11) - return _mm256_sqrt_round_pd(__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_mask_sqrt_round_pd(__m256d __W,__mmask8 __U,__m256d __A) -{ -// CHECK-LABEL: @test_mm256_mask_sqrt_round_pd -// CHECK: call <4 x double> @llvm.x86.avx10.vsqrtpd256(<4 x double> %{{.*}}, i32 11) -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} - return _mm256_mask_sqrt_round_pd(__W,__U,__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_maskz_sqrt_round_pd(__mmask8 __U,__m256d __A) -{ -// CHECK-LABEL: @test_mm256_maskz_sqrt_round_pd -// CHECK: call <4 x double> @llvm.x86.avx10.vsqrtpd256(<4 x double> %{{.*}}, i32 11) -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> {{.*}} - return _mm256_maskz_sqrt_round_pd(__U,__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_sqrt_round_ph(__m256h __A) { -// CHECK-LABEL: @test_mm256_sqrt_round_ph -// CHECK: call <16 x half> @llvm.x86.avx10.vsqrtph256(<16 x half> %{{.*}}, i32 11) - return _mm256_sqrt_round_ph(__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_mask_sqrt_round_ph(__m256h __W, __mmask16 __U, __m256h __A) { -// CHECK-LABEL: @test_mm256_mask_sqrt_round_ph -// CHECK: call <16 x half> @llvm.x86.avx10.vsqrtph256(<16 x half> %{{.*}}, i32 11) -// CHECK: bitcast i16 %{{.*}} to <16 x i1> -// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}} - return _mm256_mask_sqrt_round_ph(__W, __U, __A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_maskz_sqrt_round_ph(__mmask16 __U, __m256h __A) { -// CHECK-LABEL: @test_mm256_maskz_sqrt_round_ph -// CHECK: call <16 x half> @llvm.x86.avx10.vsqrtph256(<16 x half> %{{.*}}, i32 11) -// CHECK: bitcast i16 %{{.*}} to <16 x i1> -// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> {{.*}} - return _mm256_maskz_sqrt_round_ph(__U, __A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_sqrt_round_ps(__m256 __A) -{ -// CHECK-LABEL: @test_mm256_sqrt_round_ps -// CHECK: call <8 x float> @llvm.x86.avx10.vsqrtps256(<8 x float> %{{.*}}, i32 11) - return _mm256_sqrt_round_ps(__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_mask_sqrt_round_ps(__m256 __W,__mmask8 __U,__m256 __A) -{ -// CHECK-LABEL: @test_mm256_mask_sqrt_round_ps -// CHECK: call <8 x float> @llvm.x86.avx10.vsqrtps256(<8 x float> %{{.*}}, i32 11) -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_mask_sqrt_round_ps(__W,__U,__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_maskz_sqrt_round_ps(__mmask8 __U,__m256 __A) -{ -// CHECK-LABEL: @test_mm256_maskz_sqrt_round_ps -// CHECK: call <8 x float> @llvm.x86.avx10.vsqrtps256(<8 x float> %{{.*}}, i32 11) -// CHECK: bitcast i8 %{{.*}} to <8 x i1> -// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> {{.*}} - return _mm256_maskz_sqrt_round_ps(__U,__A,_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_sub_round_pd(__m256d __A, __m256d __B) { -// CHECK-LABEL: @test_mm256_sub_round_pd -// CHECK: @llvm.x86.avx10.vsubpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 11) - return _mm256_sub_round_pd(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_mask_sub_round_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { -// CHECK-LABEL: @test_mm256_mask_sub_round_pd -// CHECK: @llvm.x86.avx10.vsubpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 10) -// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} - return _mm256_mask_sub_round_pd(__W, __U, __A, __B, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); -} - -__m256d test_mm256_maskz_sub_round_pd(__mmask8 __U, __m256d __A, __m256d __B) { -// CHECK-LABEL: @test_mm256_maskz_sub_round_pd -// CHECK: @llvm.x86.avx10.vsubpd256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 9) -// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} - return _mm256_maskz_sub_round_pd(__U, __A, __B, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_sub_round_ph(__m256h __A, __m256h __B) { -// CHECK-LABEL: @test_mm256_sub_round_ph -// CHECK: @llvm.x86.avx10.vsubph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 11) - return _mm256_sub_round_ph(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_mask_sub_round_ph(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) { -// CHECK-LABEL: @test_mm256_mask_sub_round_ph -// CHECK: @llvm.x86.avx10.vsubph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 10) -// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}} - return _mm256_mask_sub_round_ph(__W, __U, __A, __B, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); -} - -__m256h test_mm256_maskz_sub_round_ph(__mmask8 __U, __m256h __A, __m256h __B) { -// CHECK-LABEL: @test_mm256_maskz_sub_round_ph -// CHECK: @llvm.x86.avx10.vsubph256(<16 x half> %{{.*}}, <16 x half> %{{.*}}, i32 9) -// CHECK: select <16 x i1> %{{.*}}, <16 x half> %{{.*}}, <16 x half> %{{.*}} - return _mm256_maskz_sub_round_ph(__U, __A, __B, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_sub_round_ps(__m256 __A, __m256 __B) { -// CHECK-LABEL: @test_mm256_sub_round_ps -// CHECK: @llvm.x86.avx10.vsubps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 11) - return _mm256_sub_round_ps(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_mask_sub_round_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { -// CHECK-LABEL: @test_mm256_mask_sub_round_ps -// CHECK: @llvm.x86.avx10.vsubps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 10) -// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_mask_sub_round_ps(__W, __U, __A, __B, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); -} - -__m256 test_mm256_maskz_sub_round_ps(__mmask8 __U, __m256 __A, __m256 __B) { -// CHECK-LABEL: @test_mm256_maskz_sub_round_ps -// CHECK: @llvm.x86.avx10.vsubps256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 9) -// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - return _mm256_maskz_sub_round_ps(__U, __A, __B, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); -} diff --git a/clang/test/CodeGen/X86/avx10_2satcvt-builtins.c b/clang/test/CodeGen/X86/avx10_2satcvt-builtins.c index 7c5fc087b9da4..7f30befefffe9 100644 --- a/clang/test/CodeGen/X86/avx10_2satcvt-builtins.c +++ b/clang/test/CodeGen/X86/avx10_2satcvt-builtins.c @@ -5,599 +5,457 @@ #include -__m128i test_mm_ipcvtbf16_epi8(__m128bh __A) { - // CHECK-LABEL: @test_mm_ipcvtbf16_epi8( +__m128i test_mm_ipcvts_bf16_epi8(__m128bh __A) { + // CHECK-LABEL: @test_mm_ipcvts_bf16_epi8( // CHECK: @llvm.x86.avx10.vcvtbf162ibs128 - return _mm_ipcvtbf16_epi8(__A); + return _mm_ipcvts_bf16_epi8(__A); } -__m128i test_mm_mask_ipcvtbf16_epi8(__m128i __S, __mmask8 __A, __m128bh __B) { - // CHECK-LABEL: @test_mm_mask_ipcvtbf16_epi8( +__m128i test_mm_mask_ipcvts_bf16_epi8(__m128i __S, __mmask8 __A, __m128bh __B) { + // CHECK-LABEL: @test_mm_mask_ipcvts_bf16_epi8( // CHECK: @llvm.x86.avx10.vcvtbf162ibs128 // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} - return _mm_mask_ipcvtbf16_epi8(__S, __A, __B); + return _mm_mask_ipcvts_bf16_epi8(__S, __A, __B); } -__m128i test_mm_maskz_ipcvtbf16_epi8(__mmask8 __A, __m128bh __B) { - // CHECK-LABEL: @test_mm_maskz_ipcvtbf16_epi8( +__m128i test_mm_maskz_ipcvts_bf16_epi8(__mmask8 __A, __m128bh __B) { + // CHECK-LABEL: @test_mm_maskz_ipcvts_bf16_epi8( // CHECK: @llvm.x86.avx10.vcvtbf162ibs128 // CHECK: zeroinitializer // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} - return _mm_maskz_ipcvtbf16_epi8(__A, __B); + return _mm_maskz_ipcvts_bf16_epi8(__A, __B); } -__m256i test_mm256_ipcvtbf16_epi8(__m256bh __A) { - // CHECK-LABEL: @test_mm256_ipcvtbf16_epi8( +__m256i test_mm256_ipcvts_bf16_epi8(__m256bh __A) { + // CHECK-LABEL: @test_mm256_ipcvts_bf16_epi8( // CHECK: @llvm.x86.avx10.vcvtbf162ibs256 - return _mm256_ipcvtbf16_epi8(__A); + return _mm256_ipcvts_bf16_epi8(__A); } -__m256i test_mm256_mask_ipcvtbf16_epi8(__m256i __S, __mmask16 __A, __m256bh __B) { - // CHECK-LABEL: @test_mm256_mask_ipcvtbf16_epi8( +__m256i test_mm256_mask_ipcvts_bf16_epi8(__m256i __S, __mmask16 __A, __m256bh __B) { + // CHECK-LABEL: @test_mm256_mask_ipcvts_bf16_epi8( // CHECK: @llvm.x86.avx10.vcvtbf162ibs256 // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} - return _mm256_mask_ipcvtbf16_epi8(__S, __A, __B); + return _mm256_mask_ipcvts_bf16_epi8(__S, __A, __B); } -__m256i test_mm256_maskz_ipcvtbf16_epi8(__mmask16 __A, __m256bh __B) { - // CHECK-LABEL: @test_mm256_maskz_ipcvtbf16_epi8( +__m256i test_mm256_maskz_ipcvts_bf16_epi8(__mmask16 __A, __m256bh __B) { + // CHECK-LABEL: @test_mm256_maskz_ipcvts_bf16_epi8( // CHECK: @llvm.x86.avx10.vcvtbf162ibs256 // CHECK: zeroinitializer // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} - return _mm256_maskz_ipcvtbf16_epi8(__A, __B); + return _mm256_maskz_ipcvts_bf16_epi8(__A, __B); } -__m128i test_mm_ipcvtbf16_epu8(__m128bh __A) { - // CHECK-LABEL: @test_mm_ipcvtbf16_epu8( +__m128i test_mm_ipcvts_bf16_epu8(__m128bh __A) { + // CHECK-LABEL: @test_mm_ipcvts_bf16_epu8( // CHECK: @llvm.x86.avx10.vcvtbf162iubs128 - return _mm_ipcvtbf16_epu8(__A); + return _mm_ipcvts_bf16_epu8(__A); } -__m128i test_mm_mask_ipcvtbf16_epu8(__m128i __S, __mmask8 __A, __m128bh __B) { - // CHECK-LABEL: @test_mm_mask_ipcvtbf16_epu8( +__m128i test_mm_mask_ipcvts_bf16_epu8(__m128i __S, __mmask8 __A, __m128bh __B) { + // CHECK-LABEL: @test_mm_mask_ipcvts_bf16_epu8( // CHECK: @llvm.x86.avx10.vcvtbf162iubs128 // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} - return _mm_mask_ipcvtbf16_epu8(__S, __A, __B); + return _mm_mask_ipcvts_bf16_epu8(__S, __A, __B); } -__m128i test_mm_maskz_ipcvtbf16_epu8(__mmask8 __A, __m128bh __B) { - // CHECK-LABEL: @test_mm_maskz_ipcvtbf16_epu8( +__m128i test_mm_maskz_ipcvts_bf16_epu8(__mmask8 __A, __m128bh __B) { + // CHECK-LABEL: @test_mm_maskz_ipcvts_bf16_epu8( // CHECK: @llvm.x86.avx10.vcvtbf162iubs128 // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} - return _mm_maskz_ipcvtbf16_epu8(__A, __B); + return _mm_maskz_ipcvts_bf16_epu8(__A, __B); } -__m256i test_mm256_ipcvtbf16_epu8(__m256bh __A) { - // CHECK-LABEL: @test_mm256_ipcvtbf16_epu8( +__m256i test_mm256_ipcvts_bf16_epu8(__m256bh __A) { + // CHECK-LABEL: @test_mm256_ipcvts_bf16_epu8( // CHECK: @llvm.x86.avx10.vcvtbf162iubs256 - return _mm256_ipcvtbf16_epu8(__A); + return _mm256_ipcvts_bf16_epu8(__A); } -__m256i test_mm256_mask_ipcvtbf16_epu8(__m256i __S, __mmask16 __A, __m256bh __B) { - // CHECK-LABEL: @test_mm256_mask_ipcvtbf16_epu8( +__m256i test_mm256_mask_ipcvts_bf16_epu8(__m256i __S, __mmask16 __A, __m256bh __B) { + // CHECK-LABEL: @test_mm256_mask_ipcvts_bf16_epu8( // CHECK: @llvm.x86.avx10.vcvtbf162iubs256 // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} - return _mm256_mask_ipcvtbf16_epu8(__S, __A, __B); + return _mm256_mask_ipcvts_bf16_epu8(__S, __A, __B); } -__m256i test_mm256_maskz_ipcvtbf16_epu8(__mmask16 __A, __m256bh __B) { - // CHECK-LABEL: @test_mm256_maskz_ipcvtbf16_epu8( +__m256i test_mm256_maskz_ipcvts_bf16_epu8(__mmask16 __A, __m256bh __B) { + // CHECK-LABEL: @test_mm256_maskz_ipcvts_bf16_epu8( // CHECK: @llvm.x86.avx10.vcvtbf162iubs256 // CHECK: zeroinitializer // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} - return _mm256_maskz_ipcvtbf16_epu8(__A, __B); + return _mm256_maskz_ipcvts_bf16_epu8(__A, __B); } -__m128i test_mm_ipcvtph_epi8(__m128h __A) { - // CHECK-LABEL: @test_mm_ipcvtph_epi8( +__m128i test_mm_ipcvts_ph_epi8(__m128h __A) { + // CHECK-LABEL: @test_mm_ipcvts_ph_epi8( // CHECK: @llvm.x86.avx10.mask.vcvtph2ibs128 - return _mm_ipcvtph_epi8(__A); + return _mm_ipcvts_ph_epi8(__A); } -__m128i test_mm_mask_ipcvtph_epi8(__m128i __S, __mmask8 __A, __m128h __B) { - // CHECK-LABEL: @test_mm_mask_ipcvtph_epi8( +__m128i test_mm_mask_ipcvts_ph_epi8(__m128i __S, __mmask8 __A, __m128h __B) { + // CHECK-LABEL: @test_mm_mask_ipcvts_ph_epi8( // CHECK: @llvm.x86.avx10.mask.vcvtph2ibs128 - return _mm_mask_ipcvtph_epi8(__S, __A, __B); + return _mm_mask_ipcvts_ph_epi8(__S, __A, __B); } -__m128i test_mm_maskz_ipcvtph_epi8(__mmask8 __A, __m128h __B) { - // CHECK-LABEL: @test_mm_maskz_ipcvtph_epi8( +__m128i test_mm_maskz_ipcvts_ph_epi8(__mmask8 __A, __m128h __B) { + // CHECK-LABEL: @test_mm_maskz_ipcvts_ph_epi8( // CHECK: @llvm.x86.avx10.mask.vcvtph2ibs128 - return _mm_maskz_ipcvtph_epi8(__A, __B); + return _mm_maskz_ipcvts_ph_epi8(__A, __B); } -__m256i test_mm256_ipcvtph_epi8(__m256h __A) { - // CHECK-LABEL: @test_mm256_ipcvtph_epi8( +__m256i test_mm256_ipcvts_ph_epi8(__m256h __A) { + // CHECK-LABEL: @test_mm256_ipcvts_ph_epi8( // CHECK: @llvm.x86.avx10.mask.vcvtph2ibs256 - return _mm256_ipcvtph_epi8(__A); + return _mm256_ipcvts_ph_epi8(__A); } -__m256i test_mm256_mask_ipcvtph_epi8(__m256i __S, __mmask16 __A, __m256h __B) { - // CHECK-LABEL: @test_mm256_mask_ipcvtph_epi8( +__m256i test_mm256_mask_ipcvts_ph_epi8(__m256i __S, __mmask16 __A, __m256h __B) { + // CHECK-LABEL: @test_mm256_mask_ipcvts_ph_epi8( // CHECK: @llvm.x86.avx10.mask.vcvtph2ibs256 - return _mm256_mask_ipcvtph_epi8(__S, __A, __B); + return _mm256_mask_ipcvts_ph_epi8(__S, __A, __B); } -__m256i test_mm256_maskz_ipcvtph_epi8(__mmask16 __A, __m256h __B) { - // CHECK-LABEL: @test_mm256_maskz_ipcvtph_epi8( +__m256i test_mm256_maskz_ipcvts_ph_epi8(__mmask16 __A, __m256h __B) { + // CHECK-LABEL: @test_mm256_maskz_ipcvts_ph_epi8( // CHECK: @llvm.x86.avx10.mask.vcvtph2ibs256 - return _mm256_maskz_ipcvtph_epi8(__A, __B); + return _mm256_maskz_ipcvts_ph_epi8(__A, __B); } -__m256i test_mm256_ipcvt_roundph_epi8(__m256h __A) { - // CHECK-LABEL: @test_mm256_ipcvt_roundph_epi8( - // CHECK: @llvm.x86.avx10.mask.vcvtph2ibs256 - return _mm256_ipcvt_roundph_epi8(__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_mask_ipcvt_roundph_epi8(__m256i __S, __mmask16 __A, __m256h __B) { - // CHECK-LABEL: @test_mm256_mask_ipcvt_roundph_epi8( - // CHECK: @llvm.x86.avx10.mask.vcvtph2ibs256 - return _mm256_mask_ipcvt_roundph_epi8(__S, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} -__m256i test_mm256_maskz_ipcvt_roundph_epi8(__mmask16 __A, __m256h __B) { - // CHECK-LABEL: @test_mm256_maskz_ipcvt_roundph_epi8( - // CHECK: @llvm.x86.avx10.mask.vcvtph2ibs256 - return _mm256_maskz_ipcvt_roundph_epi8(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m128i test_mm_ipcvtph_epu8(__m128h __A) { - // CHECK-LABEL: @test_mm_ipcvtph_epu8( +__m128i test_mm_ipcvts_ph_epu8(__m128h __A) { + // CHECK-LABEL: @test_mm_ipcvts_ph_epu8( // CHECK: @llvm.x86.avx10.mask.vcvtph2iubs128 - return _mm_ipcvtph_epu8(__A); + return _mm_ipcvts_ph_epu8(__A); } -__m128i test_mm_mask_ipcvtph_epu8(__m128i __S, __mmask8 __A, __m128h __B) { - // CHECK-LABEL: @test_mm_mask_ipcvtph_epu8( +__m128i test_mm_mask_ipcvts_ph_epu8(__m128i __S, __mmask8 __A, __m128h __B) { + // CHECK-LABEL: @test_mm_mask_ipcvts_ph_epu8( // CHECK: @llvm.x86.avx10.mask.vcvtph2iubs128 - return _mm_mask_ipcvtph_epu8(__S, __A, __B); + return _mm_mask_ipcvts_ph_epu8(__S, __A, __B); } -__m128i test_mm_maskz_ipcvtph_epu8(__mmask8 __A, __m128h __B) { - // CHECK-LABEL: @test_mm_maskz_ipcvtph_epu8( +__m128i test_mm_maskz_ipcvts_ph_epu8(__mmask8 __A, __m128h __B) { + // CHECK-LABEL: @test_mm_maskz_ipcvts_ph_epu8( // CHECK: @llvm.x86.avx10.mask.vcvtph2iubs128 - return _mm_maskz_ipcvtph_epu8(__A, __B); -} - -__m256i test_mm256_ipcvtph_epu8(__m256h __A) { - // CHECK-LABEL: @test_mm256_ipcvtph_epu8( - // CHECK: @llvm.x86.avx10.mask.vcvtph2iubs256 - return _mm256_ipcvtph_epu8(__A); + return _mm_maskz_ipcvts_ph_epu8(__A, __B); } -__m256i test_mm256_mask_ipcvtph_epu8(__m256i __S, __mmask16 __A, __m256h __B) { - // CHECK-LABEL: @test_mm256_mask_ipcvtph_epu8( +__m256i test_mm256_ipcvts_ph_epu8(__m256h __A) { + // CHECK-LABEL: @test_mm256_ipcvts_ph_epu8( // CHECK: @llvm.x86.avx10.mask.vcvtph2iubs256 - return _mm256_mask_ipcvtph_epu8(__S, __A, __B); + return _mm256_ipcvts_ph_epu8(__A); } -__m256i test_mm256_maskz_ipcvtph_epu8(__mmask16 __A, __m256h __B) { - // CHECK-LABEL: @test_mm256_maskz_ipcvtph_epu8( +__m256i test_mm256_mask_ipcvts_ph_epu8(__m256i __S, __mmask16 __A, __m256h __B) { + // CHECK-LABEL: @test_mm256_mask_ipcvts_ph_epu8( // CHECK: @llvm.x86.avx10.mask.vcvtph2iubs256 - return _mm256_maskz_ipcvtph_epu8(__A, __B); + return _mm256_mask_ipcvts_ph_epu8(__S, __A, __B); } -__m256i test_mm256_ipcvt_roundph_epu8(__m256h __A) { - // CHECK-LABEL: @test_mm256_ipcvt_roundph_epu8( +__m256i test_mm256_maskz_ipcvts_ph_epu8(__mmask16 __A, __m256h __B) { + // CHECK-LABEL: @test_mm256_maskz_ipcvts_ph_epu8( // CHECK: @llvm.x86.avx10.mask.vcvtph2iubs256 - return _mm256_ipcvt_roundph_epu8(__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + return _mm256_maskz_ipcvts_ph_epu8(__A, __B); } -__m256i test_mm256_mask_ipcvt_roundph_epu8(__m256i __S, __mmask16 __A, __m256h __B) { - // CHECK-LABEL: @test_mm256_mask_ipcvt_roundph_epu8( - // CHECK: @llvm.x86.avx10.mask.vcvtph2iubs256 - return _mm256_mask_ipcvt_roundph_epu8(__S, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} -__m256i test_mm256_maskz_ipcvt_roundph_epu8(__mmask16 __A, __m256h __B) { - // CHECK-LABEL: @test_mm256_maskz_ipcvt_roundph_epu8( - // CHECK: @llvm.x86.avx10.mask.vcvtph2iubs256 - return _mm256_maskz_ipcvt_roundph_epu8(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m128i test_mm_ipcvtps_epi8(__m128 __A) { - // CHECK-LABEL: @test_mm_ipcvtps_epi8( +__m128i test_mm_ipcvts_ps_epi8(__m128 __A) { + // CHECK-LABEL: @test_mm_ipcvts_ps_epi8( // CHECK: @llvm.x86.avx10.mask.vcvtps2ibs128 - return _mm_ipcvtps_epi8(__A); + return _mm_ipcvts_ps_epi8(__A); } -__m128i test_mm_mask_ipcvtps_epi8(__m128i __S, __mmask8 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_ipcvtps_epi8( +__m128i test_mm_mask_ipcvts_ps_epi8(__m128i __S, __mmask8 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_mask_ipcvts_ps_epi8( // CHECK: @llvm.x86.avx10.mask.vcvtps2ibs128 - return _mm_mask_ipcvtps_epi8(__S, __A, __B); + return _mm_mask_ipcvts_ps_epi8(__S, __A, __B); } -__m128i test_mm_maskz_ipcvtps_epi8(__mmask8 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_ipcvtps_epi8( +__m128i test_mm_maskz_ipcvts_ps_epi8(__mmask8 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_maskz_ipcvts_ps_epi8( // CHECK: @llvm.x86.avx10.mask.vcvtps2ibs128 - return _mm_maskz_ipcvtps_epi8(__A, __B); + return _mm_maskz_ipcvts_ps_epi8(__A, __B); } -__m256i test_mm256_ipcvtps_epi8(__m256 __A) { - // CHECK-LABEL: @test_mm256_ipcvtps_epi8( +__m256i test_mm256_ipcvts_ps_epi8(__m256 __A) { + // CHECK-LABEL: @test_mm256_ipcvts_ps_epi8( // CHECK: @llvm.x86.avx10.mask.vcvtps2ibs256 - return _mm256_ipcvtps_epi8(__A); + return _mm256_ipcvts_ps_epi8(__A); } -__m256i test_mm256_mask_ipcvtps_epi8(__m256i __S, __mmask8 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_mask_ipcvtps_epi8( +__m256i test_mm256_mask_ipcvts_ps_epi8(__m256i __S, __mmask8 __A, __m256 __B) { + // CHECK-LABEL: @test_mm256_mask_ipcvts_ps_epi8( // CHECK: @llvm.x86.avx10.mask.vcvtps2ibs256 - return _mm256_mask_ipcvtps_epi8(__S, __A, __B); + return _mm256_mask_ipcvts_ps_epi8(__S, __A, __B); } -__m256i test_mm256_maskz_ipcvtps_epi8(__mmask8 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_maskz_ipcvtps_epi8( +__m256i test_mm256_maskz_ipcvts_ps_epi8(__mmask8 __A, __m256 __B) { + // CHECK-LABEL: @test_mm256_maskz_ipcvts_ps_epi8( // CHECK: @llvm.x86.avx10.mask.vcvtps2ibs256 - return _mm256_maskz_ipcvtps_epi8(__A, __B); + return _mm256_maskz_ipcvts_ps_epi8(__A, __B); } -__m256i test_mm256_ipcvt_roundps_epi8(__m256 __A) { - // CHECK-LABEL: @test_mm256_ipcvt_roundps_epi8( - // CHECK: @llvm.x86.avx10.mask.vcvtps2ibs256 - return _mm256_ipcvt_roundps_epi8(__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_mask_ipcvt_roundps_epi8(__m256i __S, __mmask8 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_mask_ipcvt_roundps_epi8( - // CHECK: @llvm.x86.avx10.mask.vcvtps2ibs256 - return _mm256_mask_ipcvt_roundps_epi8(__S, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_maskz_ipcvt_roundps_epi8(__mmask8 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_maskz_ipcvt_roundps_epi8( - // CHECK: @llvm.x86.avx10.mask.vcvtps2ibs256 - return _mm256_maskz_ipcvt_roundps_epi8(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m128i test_mm_ipcvtps_epu8(__m128 __A) { - // CHECK-LABEL: @test_mm_ipcvtps_epu8( +__m128i test_mm_ipcvts_ps_epu8(__m128 __A) { + // CHECK-LABEL: @test_mm_ipcvts_ps_epu8( // CHECK: @llvm.x86.avx10.mask.vcvtps2iubs128 - return _mm_ipcvtps_epu8(__A); + return _mm_ipcvts_ps_epu8(__A); } -__m128i test_mm_mask_ipcvtps_epu8(__m128i __S, __mmask8 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_ipcvtps_epu8( +__m128i test_mm_mask_ipcvts_ps_epu8(__m128i __S, __mmask8 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_mask_ipcvts_ps_epu8( // CHECK: @llvm.x86.avx10.mask.vcvtps2iubs128 - return _mm_mask_ipcvtps_epu8(__S, __A, __B); + return _mm_mask_ipcvts_ps_epu8(__S, __A, __B); } -__m128i test_mm_maskz_ipcvtps_epu8(__mmask8 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_ipcvtps_epu8( +__m128i test_mm_maskz_ipcvts_ps_epu8(__mmask8 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_maskz_ipcvts_ps_epu8( // CHECK: @llvm.x86.avx10.mask.vcvtps2iubs128 - return _mm_maskz_ipcvtps_epu8(__A, __B); + return _mm_maskz_ipcvts_ps_epu8(__A, __B); } -__m256i test_mm256_ipcvtps_epu8(__m256 __A) { - // CHECK-LABEL: @test_mm256_ipcvtps_epu8( +__m256i test_mm256_ipcvts_ps_epu8(__m256 __A) { + // CHECK-LABEL: @test_mm256_ipcvts_ps_epu8( // CHECK: @llvm.x86.avx10.mask.vcvtps2iubs256 - return _mm256_ipcvtps_epu8(__A); + return _mm256_ipcvts_ps_epu8(__A); } -__m256i test_mm256_mask_ipcvtps_epu8(__m256i __S, __mmask8 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_mask_ipcvtps_epu8( +__m256i test_mm256_mask_ipcvts_ps_epu8(__m256i __S, __mmask8 __A, __m256 __B) { + // CHECK-LABEL: @test_mm256_mask_ipcvts_ps_epu8( // CHECK: @llvm.x86.avx10.mask.vcvtps2iubs256 - return _mm256_mask_ipcvtps_epu8(__S, __A, __B); + return _mm256_mask_ipcvts_ps_epu8(__S, __A, __B); } -__m256i test_mm256_maskz_ipcvtps_epu8(__mmask8 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_maskz_ipcvtps_epu8( +__m256i test_mm256_maskz_ipcvts_ps_epu8(__mmask8 __A, __m256 __B) { + // CHECK-LABEL: @test_mm256_maskz_ipcvts_ps_epu8( // CHECK: @llvm.x86.avx10.mask.vcvtps2iubs256 - return _mm256_maskz_ipcvtps_epu8(__A, __B); + return _mm256_maskz_ipcvts_ps_epu8(__A, __B); } -__m256i test_mm256_ipcvt_roundps_epu8(__m256 __A) { - // CHECK-LABEL: @test_mm256_ipcvt_roundps_epu8( - // CHECK: @llvm.x86.avx10.mask.vcvtps2iubs256 - return _mm256_ipcvt_roundps_epu8(__A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_mask_ipcvt_roundps_epu8(__m256i __S, __mmask8 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_mask_ipcvt_roundps_epu8( - // CHECK: @llvm.x86.avx10.mask.vcvtps2iubs256 - return _mm256_mask_ipcvt_roundps_epu8(__S, __A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_maskz_ipcvt_roundps_epu8(__mmask8 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_maskz_ipcvt_roundps_epu8( - // CHECK: @llvm.x86.avx10.mask.vcvtps2iubs256 - return _mm256_maskz_ipcvt_roundps_epu8(__A, __B, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -} - -__m128i test_mm_ipcvttbf16_epi8(__m128bh __A) { - // CHECK-LABEL: @test_mm_ipcvttbf16_epi8( +__m128i test_mm_ipcvtts_bf16_epi8(__m128bh __A) { + // CHECK-LABEL: @test_mm_ipcvtts_bf16_epi8( // CHECK: @llvm.x86.avx10.vcvttbf162ibs128 - return _mm_ipcvttbf16_epi8(__A); + return _mm_ipcvtts_bf16_epi8(__A); } -__m128i test_mm_mask_ipcvttbf16_epi8(__m128i __S, __mmask8 __A, __m128bh __B) { - // CHECK-LABEL: @test_mm_mask_ipcvttbf16_epi8( +__m128i test_mm_mask_ipcvtts_bf16_epi8(__m128i __S, __mmask8 __A, __m128bh __B) { + // CHECK-LABEL: @test_mm_mask_ipcvtts_bf16_epi8( // CHECK: @llvm.x86.avx10.vcvttbf162ibs128 // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} - return _mm_mask_ipcvttbf16_epi8(__S, __A, __B); + return _mm_mask_ipcvtts_bf16_epi8(__S, __A, __B); } -__m128i test_mm_maskz_ipcvttbf16_epi8(__mmask8 __A, __m128bh __B) { - // CHECK-LABEL: @test_mm_maskz_ipcvttbf16_epi8( +__m128i test_mm_maskz_ipcvtts_bf16_epi8(__mmask8 __A, __m128bh __B) { + // CHECK-LABEL: @test_mm_maskz_ipcvtts_bf16_epi8( // CHECK: @llvm.x86.avx10.vcvttbf162ibs128 // CHECK: zeroinitializer // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} - return _mm_maskz_ipcvttbf16_epi8(__A, __B); + return _mm_maskz_ipcvtts_bf16_epi8(__A, __B); } -__m256i test_mm256_ipcvttbf16_epi8(__m256bh __A) { - // CHECK-LABEL: @test_mm256_ipcvttbf16_epi8( +__m256i test_mm256_ipcvtts_bf16_epi8(__m256bh __A) { + // CHECK-LABEL: @test_mm256_ipcvtts_bf16_epi8( // CHECK: @llvm.x86.avx10.vcvttbf162ibs256 - return _mm256_ipcvttbf16_epi8(__A); + return _mm256_ipcvtts_bf16_epi8(__A); } -__m256i test_mm256_mask_ipcvttbf16_epi8(__m256i __S, __mmask16 __A, __m256bh __B) { - // CHECK-LABEL: @test_mm256_mask_ipcvttbf16_epi8( +__m256i test_mm256_mask_ipcvtts_bf16_epi8(__m256i __S, __mmask16 __A, __m256bh __B) { + // CHECK-LABEL: @test_mm256_mask_ipcvtts_bf16_epi8( // CHECK: @llvm.x86.avx10.vcvttbf162ibs256 // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} - return _mm256_mask_ipcvttbf16_epi8(__S, __A, __B); + return _mm256_mask_ipcvtts_bf16_epi8(__S, __A, __B); } -__m256i test_mm256_maskz_ipcvttbf16_epi8(__mmask16 __A, __m256bh __B) { - // CHECK-LABEL: @test_mm256_maskz_ipcvttbf16_epi8( +__m256i test_mm256_maskz_ipcvtts_bf16_epi8(__mmask16 __A, __m256bh __B) { + // CHECK-LABEL: @test_mm256_maskz_ipcvtts_bf16_epi8( // CHECK: @llvm.x86.avx10.vcvttbf162ibs256 // CHECK: zeroinitializer // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} - return _mm256_maskz_ipcvttbf16_epi8(__A, __B); + return _mm256_maskz_ipcvtts_bf16_epi8(__A, __B); } -__m128i test_mm_ipcvttbf16_epu8(__m128bh __A) { - // CHECK-LABEL: @test_mm_ipcvttbf16_epu8( +__m128i test_mm_ipcvtts_bf16_epu8(__m128bh __A) { + // CHECK-LABEL: @test_mm_ipcvtts_bf16_epu8( // CHECK: @llvm.x86.avx10.vcvttbf162iubs128 - return _mm_ipcvttbf16_epu8(__A); + return _mm_ipcvtts_bf16_epu8(__A); } -__m128i test_mm_mask_ipcvttbf16_epu8(__m128i __S, __mmask8 __A, __m128bh __B) { - // CHECK-LABEL: @test_mm_mask_ipcvttbf16_epu8( +__m128i test_mm_mask_ipcvtts_bf16_epu8(__m128i __S, __mmask8 __A, __m128bh __B) { + // CHECK-LABEL: @test_mm_mask_ipcvtts_bf16_epu8( // CHECK: @llvm.x86.avx10.vcvttbf162iubs128 // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} - return _mm_mask_ipcvttbf16_epu8(__S, __A, __B); + return _mm_mask_ipcvtts_bf16_epu8(__S, __A, __B); } -__m128i test_mm_maskz_ipcvttbf16_epu8(__mmask8 __A, __m128bh __B) { - // CHECK-LABEL: @test_mm_maskz_ipcvttbf16_epu8( +__m128i test_mm_maskz_ipcvtts_bf16_epu8(__mmask8 __A, __m128bh __B) { + // CHECK-LABEL: @test_mm_maskz_ipcvtts_bf16_epu8( // CHECK: @llvm.x86.avx10.vcvttbf162iubs128 // CHECK: zeroinitializer // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} - return _mm_maskz_ipcvttbf16_epu8(__A, __B); + return _mm_maskz_ipcvtts_bf16_epu8(__A, __B); } -__m256i test_mm256_ipcvttbf16_epu8(__m256bh __A) { - // CHECK-LABEL: @test_mm256_ipcvttbf16_epu8( +__m256i test_mm256_ipcvtts_bf16_epu8(__m256bh __A) { + // CHECK-LABEL: @test_mm256_ipcvtts_bf16_epu8( // CHECK: @llvm.x86.avx10.vcvttbf162iubs256 - return _mm256_ipcvttbf16_epu8(__A); + return _mm256_ipcvtts_bf16_epu8(__A); } -__m256i test_mm256_mask_ipcvttbf16_epu8(__m256i __S, __mmask16 __A, __m256bh __B) { - // CHECK-LABEL: @test_mm256_mask_ipcvttbf16_epu8( +__m256i test_mm256_mask_ipcvtts_bf16_epu8(__m256i __S, __mmask16 __A, __m256bh __B) { + // CHECK-LABEL: @test_mm256_mask_ipcvtts_bf16_epu8( // CHECK: @llvm.x86.avx10.vcvttbf162iubs256 // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} - return _mm256_mask_ipcvttbf16_epu8(__S, __A, __B); + return _mm256_mask_ipcvtts_bf16_epu8(__S, __A, __B); } -__m256i test_mm256_maskz_ipcvttbf16_epu8(__mmask16 __A, __m256bh __B) { - // CHECK-LABEL: @test_mm256_maskz_ipcvttbf16_epu8( +__m256i test_mm256_maskz_ipcvtts_bf16_epu8(__mmask16 __A, __m256bh __B) { + // CHECK-LABEL: @test_mm256_maskz_ipcvtts_bf16_epu8( // CHECK: @llvm.x86.avx10.vcvttbf162iubs256 // CHECK: zeroinitializer // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} - return _mm256_maskz_ipcvttbf16_epu8(__A, __B); + return _mm256_maskz_ipcvtts_bf16_epu8(__A, __B); } -__m128i test_mm_ipcvttph_epi8(__m128h __A) { - // CHECK-LABEL: @test_mm_ipcvttph_epi8( +__m128i test_mm_ipcvtts_ph_epi8(__m128h __A) { + // CHECK-LABEL: @test_mm_ipcvtts_ph_epi8( // CHECK: @llvm.x86.avx10.mask.vcvttph2ibs128 - return _mm_ipcvttph_epi8(__A); + return _mm_ipcvtts_ph_epi8(__A); } -__m128i test_mm_mask_ipcvttph_epi8(__m128i __S, __mmask8 __A, __m128h __B) { - // CHECK-LABEL: @test_mm_mask_ipcvttph_epi8( +__m128i test_mm_mask_ipcvtts_ph_epi8(__m128i __S, __mmask8 __A, __m128h __B) { + // CHECK-LABEL: @test_mm_mask_ipcvtts_ph_epi8( // CHECK: @llvm.x86.avx10.mask.vcvttph2ibs128 - return _mm_mask_ipcvttph_epi8(__S, __A, __B); + return _mm_mask_ipcvtts_ph_epi8(__S, __A, __B); } -__m128i test_mm_maskz_ipcvttph_epi8(__mmask8 __A, __m128h __B) { - // CHECK-LABEL: @test_mm_maskz_ipcvttph_epi8( +__m128i test_mm_maskz_ipcvtts_ph_epi8(__mmask8 __A, __m128h __B) { + // CHECK-LABEL: @test_mm_maskz_ipcvtts_ph_epi8( // CHECK: @llvm.x86.avx10.mask.vcvttph2ibs128 - return _mm_maskz_ipcvttph_epi8(__A, __B); -} - -__m256i test_mm256_ipcvttph_epi8(__m256h __A) { - // CHECK-LABEL: @test_mm256_ipcvttph_epi8( - // CHECK: @llvm.x86.avx10.mask.vcvttph2ibs256 - return _mm256_ipcvttph_epi8(__A); -} - -__m256i test_mm256_mask_ipcvttph_epi8(__m256i __S, __mmask16 __A, __m256h __B) { - // CHECK-LABEL: @test_mm256_mask_ipcvttph_epi8( - // CHECK: @llvm.x86.avx10.mask.vcvttph2ibs256 - return _mm256_mask_ipcvttph_epi8(__S, __A, __B); + return _mm_maskz_ipcvtts_ph_epi8(__A, __B); } -__m256i test_mm256_maskz_ipcvttph_epi8(__mmask16 __A, __m256h __B) { - // CHECK-LABEL: @test_mm256_maskz_ipcvttph_epi8( +__m256i test_mm256_ipcvtts_ph_epi8(__m256h __A) { + // CHECK-LABEL: @test_mm256_ipcvtts_ph_epi8( // CHECK: @llvm.x86.avx10.mask.vcvttph2ibs256 - return _mm256_maskz_ipcvttph_epi8(__A, __B); + return _mm256_ipcvtts_ph_epi8(__A); } -__m256i test_mm256_ipcvtt_roundph_epi8(__m256h __A) { - // CHECK-LABEL: @test_mm256_ipcvtt_roundph_epi8( +__m256i test_mm256_mask_ipcvtts_ph_epi8(__m256i __S, __mmask16 __A, __m256h __B) { + // CHECK-LABEL: @test_mm256_mask_ipcvtts_ph_epi8( // CHECK: @llvm.x86.avx10.mask.vcvttph2ibs256 - return _mm256_ipcvtt_roundph_epi8(__A, _MM_FROUND_NO_EXC); + return _mm256_mask_ipcvtts_ph_epi8(__S, __A, __B); } -__m256i test_mm256_mask_ipcvtt_roundph_epi8(__m256i __S, __mmask16 __A, __m256h __B) { - // CHECK-LABEL: @test_mm256_mask_ipcvtt_roundph_epi8( +__m256i test_mm256_maskz_ipcvtts_ph_epi8(__mmask16 __A, __m256h __B) { + // CHECK-LABEL: @test_mm256_maskz_ipcvtts_ph_epi8( // CHECK: @llvm.x86.avx10.mask.vcvttph2ibs256 - return _mm256_mask_ipcvtt_roundph_epi8(__S, __A, __B, _MM_FROUND_NO_EXC); + return _mm256_maskz_ipcvtts_ph_epi8(__A, __B); } -__m256i test_mm256_maskz_ipcvtt_roundph_epi8(__mmask16 __A, __m256h __B) { - // CHECK-LABEL: @test_mm256_maskz_ipcvtt_roundph_epi8( - // CHECK: @llvm.x86.avx10.mask.vcvttph2ibs256 - return _mm256_maskz_ipcvtt_roundph_epi8(__A, __B, _MM_FROUND_NO_EXC); -} - -__m128i test_mm_ipcvttph_epu8(__m128h __A) { - // CHECK-LABEL: @test_mm_ipcvttph_epu8( +__m128i test_mm_ipcvtts_ph_epu8(__m128h __A) { + // CHECK-LABEL: @test_mm_ipcvtts_ph_epu8( // CHECK: @llvm.x86.avx10.mask.vcvttph2iubs128 - return _mm_ipcvttph_epu8(__A); + return _mm_ipcvtts_ph_epu8(__A); } -__m128i test_mm_mask_ipcvttph_epu8(__m128i __S, __mmask8 __A, __m128h __B) { - // CHECK-LABEL: @test_mm_mask_ipcvttph_epu8( +__m128i test_mm_mask_ipcvtts_ph_epu8(__m128i __S, __mmask8 __A, __m128h __B) { + // CHECK-LABEL: @test_mm_mask_ipcvtts_ph_epu8( // CHECK: @llvm.x86.avx10.mask.vcvttph2iubs128 - return _mm_mask_ipcvttph_epu8(__S, __A, __B); + return _mm_mask_ipcvtts_ph_epu8(__S, __A, __B); } -__m128i test_mm_maskz_ipcvttph_epu8(__mmask8 __A, __m128h __B) { - // CHECK-LABEL: @test_mm_maskz_ipcvttph_epu8( +__m128i test_mm_maskz_ipcvtts_ph_epu8(__mmask8 __A, __m128h __B) { + // CHECK-LABEL: @test_mm_maskz_ipcvtts_ph_epu8( // CHECK: @llvm.x86.avx10.mask.vcvttph2iubs128 - return _mm_maskz_ipcvttph_epu8(__A, __B); -} - -__m256i test_mm256_ipcvttph_epu8(__m256h __A) { - // CHECK-LABEL: @test_mm256_ipcvttph_epu8( - // CHECK: @llvm.x86.avx10.mask.vcvttph2iubs256 - return _mm256_ipcvttph_epu8(__A); -} - -__m256i test_mm256_mask_ipcvttph_epu8(__m256i __S, __mmask16 __A, __m256h __B) { - // CHECK-LABEL: @test_mm256_mask_ipcvttph_epu8( - // CHECK: @llvm.x86.avx10.mask.vcvttph2iubs256 - return _mm256_mask_ipcvttph_epu8(__S, __A, __B); + return _mm_maskz_ipcvtts_ph_epu8(__A, __B); } -__m256i test_mm256_maskz_ipcvttph_epu8(__mmask16 __A, __m256h __B) { - // CHECK-LABEL: @test_mm256_maskz_ipcvttph_epu8( +__m256i test_mm256_ipcvtts_ph_epu8(__m256h __A) { + // CHECK-LABEL: @test_mm256_ipcvtts_ph_epu8( // CHECK: @llvm.x86.avx10.mask.vcvttph2iubs256 - return _mm256_maskz_ipcvttph_epu8(__A, __B); + return _mm256_ipcvtts_ph_epu8(__A); } -__m256i test_mm256_ipcvtt_roundph_epu8(__m256h __A) { - // CHECK-LABEL: @test_mm256_ipcvtt_roundph_epu8( +__m256i test_mm256_mask_ipcvtts_ph_epu8(__m256i __S, __mmask16 __A, __m256h __B) { + // CHECK-LABEL: @test_mm256_mask_ipcvtts_ph_epu8( // CHECK: @llvm.x86.avx10.mask.vcvttph2iubs256 - return _mm256_ipcvtt_roundph_epu8(__A, _MM_FROUND_NO_EXC); + return _mm256_mask_ipcvtts_ph_epu8(__S, __A, __B); } -__m256i test_mm256_mask_ipcvtt_roundph_epu8(__m256i __S, __mmask16 __A, __m256h __B) { - // CHECK-LABEL: @test_mm256_mask_ipcvtt_roundph_epu8( +__m256i test_mm256_maskz_ipcvtts_ph_epu8(__mmask16 __A, __m256h __B) { + // CHECK-LABEL: @test_mm256_maskz_ipcvtts_ph_epu8( // CHECK: @llvm.x86.avx10.mask.vcvttph2iubs256 - return _mm256_mask_ipcvtt_roundph_epu8(__S, __A, __B, _MM_FROUND_NO_EXC); + return _mm256_maskz_ipcvtts_ph_epu8(__A, __B); } -__m256i test_mm256_maskz_ipcvtt_roundph_epu8(__mmask16 __A, __m256h __B) { - // CHECK-LABEL: @test_mm256_maskz_ipcvtt_roundph_epu8( - // CHECK: @llvm.x86.avx10.mask.vcvttph2iubs256 - return _mm256_maskz_ipcvtt_roundph_epu8(__A, __B, _MM_FROUND_NO_EXC); -} - -__m128i test_mm_ipcvttps_epi8(__m128 __A) { - // CHECK-LABEL: @test_mm_ipcvttps_epi8( +__m128i test_mm_ipcvtts_ps_epi8(__m128 __A) { + // CHECK-LABEL: @test_mm_ipcvtts_ps_epi8( // CHECK: @llvm.x86.avx10.mask.vcvttps2ibs128 - return _mm_ipcvttps_epi8(__A); + return _mm_ipcvtts_ps_epi8(__A); } -__m128i test_mm_mask_ipcvttps_epi8(__m128i __S, __mmask8 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_ipcvttps_epi8( +__m128i test_mm_mask_ipcvtts_ps_epi8(__m128i __S, __mmask8 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_mask_ipcvtts_ps_epi8( // CHECK: @llvm.x86.avx10.mask.vcvttps2ibs128 - return _mm_mask_ipcvttps_epi8(__S, __A, __B); + return _mm_mask_ipcvtts_ps_epi8(__S, __A, __B); } -__m128i test_mm_maskz_ipcvttps_epi8(__mmask8 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_ipcvttps_epi8( +__m128i test_mm_maskz_ipcvtts_ps_epi8(__mmask8 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_maskz_ipcvtts_ps_epi8( // CHECK: @llvm.x86.avx10.mask.vcvttps2ibs128 - return _mm_maskz_ipcvttps_epi8(__A, __B); + return _mm_maskz_ipcvtts_ps_epi8(__A, __B); } -__m256i test_mm256_ipcvttps_epi8(__m256 __A) { - // CHECK-LABEL: @test_mm256_ipcvttps_epi8( +__m256i test_mm256_ipcvtts_ps_epi8(__m256 __A) { + // CHECK-LABEL: @test_mm256_ipcvtts_ps_epi8( // CHECK: @llvm.x86.avx10.mask.vcvttps2ibs256 - return _mm256_ipcvttps_epi8(__A); + return _mm256_ipcvtts_ps_epi8(__A); } -__m256i test_mm256_mask_ipcvttps_epi8(__m256i __S, __mmask8 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_mask_ipcvttps_epi8( +__m256i test_mm256_mask_ipcvtts_ps_epi8(__m256i __S, __mmask8 __A, __m256 __B) { + // CHECK-LABEL: @test_mm256_mask_ipcvtts_ps_epi8( // CHECK: @llvm.x86.avx10.mask.vcvttps2ibs256 - return _mm256_mask_ipcvttps_epi8(__S, __A, __B); + return _mm256_mask_ipcvtts_ps_epi8(__S, __A, __B); } -__m256i test_mm256_maskz_ipcvttps_epi8(__mmask8 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_maskz_ipcvttps_epi8( +__m256i test_mm256_maskz_ipcvtts_ps_epi8(__mmask8 __A, __m256 __B) { + // CHECK-LABEL: @test_mm256_maskz_ipcvtts_ps_epi8( // CHECK: @llvm.x86.avx10.mask.vcvttps2ibs256 - return _mm256_maskz_ipcvttps_epi8(__A, __B); + return _mm256_maskz_ipcvtts_ps_epi8(__A, __B); } -__m256i test_mm256_ipcvtt_roundps_epi8(__m256 __A) { - // CHECK-LABEL: @test_mm256_ipcvtt_roundps_epi8( - // CHECK: @llvm.x86.avx10.mask.vcvttps2ibs256 - return _mm256_ipcvtt_roundps_epi8(__A, _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_mask_ipcvtt_roundps_epi8(__m256i __S, __mmask8 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_mask_ipcvtt_roundps_epi8( - // CHECK: @llvm.x86.avx10.mask.vcvttps2ibs256 - return _mm256_mask_ipcvtt_roundps_epi8(__S, __A, __B, _MM_FROUND_NO_EXC); -} - -__m256i test_mm256_maskz_ipcvtt_roundps_epi8(__mmask8 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_maskz_ipcvtt_roundps_epi8( - // CHECK: @llvm.x86.avx10.mask.vcvttps2ibs256 - return _mm256_maskz_ipcvtt_roundps_epi8(__A, __B, _MM_FROUND_NO_EXC); -} - -__m128i test_mm_ipcvttps_epu8(__m128 __A) { - // CHECK-LABEL: @test_mm_ipcvttps_epu8( +__m128i test_mm_ipcvtts_ps_epu8(__m128 __A) { + // CHECK-LABEL: @test_mm_ipcvtts_ps_epu8( // CHECK: @llvm.x86.avx10.mask.vcvttps2iubs128 - return _mm_ipcvttps_epu8(__A); + return _mm_ipcvtts_ps_epu8(__A); } -__m128i test_mm_mask_ipcvttps_epu8(__m128i __S, __mmask8 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_ipcvttps_epu8( +__m128i test_mm_mask_ipcvtts_ps_epu8(__m128i __S, __mmask8 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_mask_ipcvtts_ps_epu8( // CHECK: @llvm.x86.avx10.mask.vcvttps2iubs128 - return _mm_mask_ipcvttps_epu8(__S, __A, __B); + return _mm_mask_ipcvtts_ps_epu8(__S, __A, __B); } -__m128i test_mm_maskz_ipcvttps_epu8(__mmask8 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_ipcvttps_epu8( +__m128i test_mm_maskz_ipcvtts_ps_epu8(__mmask8 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_maskz_ipcvtts_ps_epu8( // CHECK: @llvm.x86.avx10.mask.vcvttps2iubs128 - return _mm_maskz_ipcvttps_epu8(__A, __B); -} - -__m256i test_mm256_ipcvttps_epu8(__m256 __A) { - // CHECK-LABEL: @test_mm256_ipcvttps_epu8( - // CHECK: @llvm.x86.avx10.mask.vcvttps2iubs256 - return _mm256_ipcvttps_epu8(__A); -} - -__m256i test_mm256_mask_ipcvttps_epu8(__m256i __S, __mmask8 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_mask_ipcvttps_epu8( - // CHECK: @llvm.x86.avx10.mask.vcvttps2iubs256 - return _mm256_mask_ipcvttps_epu8(__S, __A, __B); -} - -__m256i test_mm256_maskz_ipcvttps_epu8(__mmask8 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_maskz_ipcvttps_epu8( - // CHECK: @llvm.x86.avx10.mask.vcvttps2iubs256 - return _mm256_maskz_ipcvttps_epu8(__A, __B); + return _mm_maskz_ipcvtts_ps_epu8(__A, __B); } -__m256i test_mm256_ipcvtt_roundps_epu8(__m256 __A) { - // CHECK-LABEL: @test_mm256_ipcvtt_roundps_epu8( +__m256i test_mm256_ipcvtts_ps_epu8(__m256 __A) { + // CHECK-LABEL: @test_mm256_ipcvtts_ps_epu8( // CHECK: @llvm.x86.avx10.mask.vcvttps2iubs256 - return _mm256_ipcvtt_roundps_epu8(__A, _MM_FROUND_NO_EXC); + return _mm256_ipcvtts_ps_epu8(__A); } -__m256i test_mm256_mask_ipcvtt_roundps_epu8(__m256i __S, __mmask8 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_mask_ipcvtt_roundps_epu8( +__m256i test_mm256_mask_ipcvtts_ps_epu8(__m256i __S, __mmask8 __A, __m256 __B) { + // CHECK-LABEL: @test_mm256_mask_ipcvtts_ps_epu8( // CHECK: @llvm.x86.avx10.mask.vcvttps2iubs256 - return _mm256_mask_ipcvtt_roundps_epu8(__S, __A, __B, _MM_FROUND_NO_EXC); + return _mm256_mask_ipcvtts_ps_epu8(__S, __A, __B); } -__m256i test_mm256_maskz_ipcvtt_roundps_epu8(__mmask8 __A, __m256 __B) { - // CHECK-LABEL: @test_mm256_maskz_ipcvtt_roundps_epu8( +__m256i test_mm256_maskz_ipcvtts_ps_epu8(__mmask8 __A, __m256 __B) { + // CHECK-LABEL: @test_mm256_maskz_ipcvtts_ps_epu8( // CHECK: @llvm.x86.avx10.mask.vcvttps2iubs256 - return _mm256_maskz_ipcvtt_roundps_epu8(__A, __B, _MM_FROUND_NO_EXC); + return _mm256_maskz_ipcvtts_ps_epu8(__A, __B); } diff --git a/clang/test/CodeGen/X86/avx10_2satcvtds-builtins-errors.c b/clang/test/CodeGen/X86/avx10_2satcvtds-builtins-errors.c deleted file mode 100644 index f32dfba60132d..0000000000000 --- a/clang/test/CodeGen/X86/avx10_2satcvtds-builtins-errors.c +++ /dev/null @@ -1,57 +0,0 @@ -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-unknown-unknown -target-feature +avx10.2-256 -Wall -Werror -verify - -unsigned long long test_mm_cvttssd(unsigned long long __A) { - return _mm_cvttssd(__A); // expected-error {{call to undeclared function '_mm_cvttssd'}} -} - -unsigned long long test_mm_cvttsss(unsigned long long __A) { - return _mm_cvttsss(__A); // expected-error {{call to undeclared function '_mm_cvttsss'}} -} - -#include -#include - -__m128i test_mm256_cvtts_roundpd_epi32(__m256d A) { - return _mm256_cvtts_roundpd_epi32(A, 22); // expected-error {{invalid rounding argument}} -} -__m128i test_mm256_mask_cvtts_roundpd_epi32(__m128i W, __mmask8 U, __m256d A) { - return _mm256_mask_cvtts_roundpd_epi32(W, U, A, 22); // expected-error {{invalid rounding argument}} -} - -__m128i test_mm256_maskz_cvtts_roundpd_epi32(__mmask8 U, __m256d A) { - return _mm256_maskz_cvtts_roundpd_epi32(U, A, 22); // expected-error {{invalid rounding argument}} -} - -__m128i test_mm256_cvtts_roundpd_epu32(__m256d A) { - return _mm256_cvtts_roundpd_epu32(A, 22); // expected-error {{invalid rounding argument}} -} -__m128i test_mm256_mask_cvtts_roundpd_epu32(__m128i W, __mmask8 U, __m256d A) { - return _mm256_mask_cvtts_roundpd_epu32(W, U, A, 22); // expected-error {{invalid rounding argument}} -} - -__m128i test_mm256_maskz_cvtts_roundpd_epu32(__mmask8 U, __m256d A) { - return _mm256_maskz_cvtts_roundpd_epu32(U, A, 22); // expected-error {{invalid rounding argument}} -} - -__m256i test_mm256_cvtts_roundps_epi32(__m256 A) { - return _mm256_cvtts_roundps_epi32(A, 22); // expected-error {{invalid rounding argument}} -} -__m256i test_mm256_mask_cvtts_roundps_epi32(__m256i W, __mmask8 U, __m256 A) { - return _mm256_mask_cvtts_roundps_epi32(W, U, A, 22); // expected-error {{invalid rounding argument}} -} - -__m256i test_mm256_maskz_cvtts_roundps_epi32(__mmask8 U, __m256 A) { - return _mm256_maskz_cvtts_roundps_epi32(U, A, 22); // expected-error {{invalid rounding argument}} -} - -__m256i test_mm256_cvtts_roundps_epu32(__m256 A) { - return _mm256_cvtts_roundps_epu32(A, 22); // expected-error {{invalid rounding argument}} -} - -__m256i test_mm256_mask_cvtts_roundps_epu32(__m256i W, __mmask8 U, __m256 A) { - return _mm256_mask_cvtts_roundps_epu32(W, U, A, 22); // expected-error {{invalid rounding argument}} -} - -__m256i test_mm256_maskz_cvtts_roundps_epu32(__mmask8 U, __m256 A) { - return _mm256_maskz_cvtts_roundps_epu32(U, A, 22); // expected-error {{invalid rounding argument}} -} diff --git a/clang/test/CodeGen/X86/avx10_2satcvtds-builtins-x64.c b/clang/test/CodeGen/X86/avx10_2satcvtds-builtins-x64.c index 00384731a51f7..fe6755cc05ae1 100644 --- a/clang/test/CodeGen/X86/avx10_2satcvtds-builtins-x64.c +++ b/clang/test/CodeGen/X86/avx10_2satcvtds-builtins-x64.c @@ -5,258 +5,186 @@ // scalar -int test_mm_cvttssd_i32(__m128d __A) { - // CHECK-LABEL: @test_mm_cvttssd_i32 +int test_mm_cvtts_sd_i32(__m128d __A) { + // CHECK-LABEL: @test_mm_cvtts_sd_i32 // CHECK: @llvm.x86.avx10.vcvttsd2sis return _mm_cvtts_roundsd_i32(__A, _MM_FROUND_NO_EXC); } -int test_mm_cvttssd_si32(__m128d __A) { - // CHECK-LABEL: @test_mm_cvttssd_si32( +int test_mm_cvtts_sd_si32(__m128d __A) { + // CHECK-LABEL: @test_mm_cvtts_sd_si32( // CHECK: @llvm.x86.avx10.vcvttsd2sis(<2 x double> return _mm_cvtts_roundsd_si32(__A, _MM_FROUND_NO_EXC); } -unsigned test_mm_cvttssd_u32(__m128d __A) { - // CHECK-LABEL: @test_mm_cvttssd_u32( +unsigned test_mm_cvtts_sd_u32(__m128d __A) { + // CHECK-LABEL: @test_mm_cvtts_sd_u32( // CHECK: @llvm.x86.avx10.vcvttsd2usis(<2 x double> return _mm_cvtts_roundsd_u32(__A, _MM_FROUND_NO_EXC); } -int test_mm_cvttsss_i32(__m128 __A) { - // CHECK-LABEL: @test_mm_cvttsss_i32( +int test_mm_cvtts_ss_i32(__m128 __A) { + // CHECK-LABEL: @test_mm_cvtts_ss_i32( // CHECK: @llvm.x86.avx10.vcvttss2sis(<4 x float> return _mm_cvtts_roundss_i32(__A, _MM_FROUND_NO_EXC); } -int test_mm_cvttsss_si32(__m128 __A) { - // CHECK-LABEL: @test_mm_cvttsss_si32( +int test_mm_cvtts_ss_si32(__m128 __A) { + // CHECK-LABEL: @test_mm_cvtts_ss_si32( // CHECK: @llvm.x86.avx10.vcvttss2sis(<4 x float> return _mm_cvtts_roundss_si32(__A, _MM_FROUND_NO_EXC); } -unsigned test_mm_cvttsss_u32(__m128 __A) { - // CHECK-LABEL: @test_mm_cvttsss_u32( +unsigned test_mm_cvtts_ss_u32(__m128 __A) { + // CHECK-LABEL: @test_mm_cvtts_ss_u32( // CHECK: @llvm.x86.avx10.vcvttss2usis(<4 x float> return _mm_cvtts_roundss_u32(__A, _MM_FROUND_NO_EXC); } // vector // 128 bit -__m128i test_mm_cvttspd_epi64(__m128d A){ - // CHECK-LABEL: @test_mm_cvttspd_epi64 +__m128i test_mm_cvtts_pd_epi64(__m128d A){ + // CHECK-LABEL: @test_mm_cvtts_pd_epi64 // CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.128(<2 x double> - return _mm_cvttspd_epi64(A); + return _mm_cvtts_pd_epi64(A); } -__m128i test_mm_mask_cvttspd_epi64(__m128i W, __mmask8 U, __m128d A){ - // CHECK-LABEL: @test_mm_mask_cvttspd_epi64 +__m128i test_mm_mask_cvtts_pd_epi64(__m128i W, __mmask8 U, __m128d A){ + // CHECK-LABEL: @test_mm_mask_cvtts_pd_epi64 // CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.128(<2 x double> - return _mm_mask_cvttspd_epi64(W, U, A); + return _mm_mask_cvtts_pd_epi64(W, U, A); } -__m128i test_mm_maskz_cvttspd_epi64(__mmask8 U,__m128d A){ - // CHECK-LABEL: @test_mm_maskz_cvttspd_epi64 +__m128i test_mm_maskz_cvtts_pd_epi64(__mmask8 U,__m128d A){ + // CHECK-LABEL: @test_mm_maskz_cvtts_pd_epi64 // CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.128(<2 x double> - return _mm_maskz_cvttspd_epi64(U, A); + return _mm_maskz_cvtts_pd_epi64(U, A); } -__m128i test_mm_cvttspd_epu64(__m128d A){ - // CHECK-LABEL: @test_mm_cvttspd_epu64 +__m128i test_mm_cvtts_pd_epu64(__m128d A){ + // CHECK-LABEL: @test_mm_cvtts_pd_epu64 // CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.128(<2 x double> - return _mm_cvttspd_epu64(A); + return _mm_cvtts_pd_epu64(A); } -__m128i test_mm_mask_cvttspd_epu64(__m128i W, __mmask8 U, __m128d A){ - // CHECK-LABEL: @test_mm_mask_cvttspd_epu64 +__m128i test_mm_mask_cvtts_pd_epu64(__m128i W, __mmask8 U, __m128d A){ + // CHECK-LABEL: @test_mm_mask_cvtts_pd_epu64 // CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.128(<2 x double> - return _mm_mask_cvttspd_epu64(W, U, A); + return _mm_mask_cvtts_pd_epu64(W, U, A); } -__m128i test_mm_maskz_cvttspd_epu64(__mmask8 U,__m128d A){ - // CHECK-LABEL: @test_mm_maskz_cvttspd_epu64 +__m128i test_mm_maskz_cvtts_pd_epu64(__mmask8 U,__m128d A){ + // CHECK-LABEL: @test_mm_maskz_cvtts_pd_epu64 // CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.128(<2 x double> - return _mm_maskz_cvttspd_epu64(U, A); + return _mm_maskz_cvtts_pd_epu64(U, A); } // 256 bit -__m256i test_mm256_cvttspd_epi64(__m256d A){ -// CHECK-LABEL: @test_mm256_cvttspd_epi64 +__m256i test_mm256_cvtts_pd_epi64(__m256d A){ +// CHECK-LABEL: @test_mm256_cvtts_pd_epi64 // CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.256(<4 x double> - return _mm256_cvttspd_epi64(A); + return _mm256_cvtts_pd_epi64(A); } -__m256i test_mm256_mask_cvttspd_epi64(__m256i W,__mmask8 U, __m256d A){ -// CHECK-LABEL: @test_mm256_mask_cvttspd_epi64 +__m256i test_mm256_mask_cvtts_pd_epi64(__m256i W,__mmask8 U, __m256d A){ +// CHECK-LABEL: @test_mm256_mask_cvtts_pd_epi64 // CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.256(<4 x double> - return _mm256_mask_cvttspd_epi64(W,U, A); + return _mm256_mask_cvtts_pd_epi64(W,U, A); } -__m256i test_mm256_maskz_cvttspd_epi64(__mmask8 U, __m256d A){ -// CHECK-LABEL: @test_mm256_maskz_cvttspd_epi64 +__m256i test_mm256_maskz_cvtts_pd_epi64(__mmask8 U, __m256d A){ +// CHECK-LABEL: @test_mm256_maskz_cvtts_pd_epi64 // CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.256(<4 x double> - return _mm256_maskz_cvttspd_epi64(U, A); + return _mm256_maskz_cvtts_pd_epi64(U, A); } -__m256i test_mm256_cvtts_roundpd_epi64(__m256d A){ -// CHECK-LABEL: @test_mm256_cvtts_roundpd_epi64 -// CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.256(<4 x double> - return _mm256_cvtts_roundpd_epi64(A,_MM_FROUND_NEARBYINT ); -} - -__m256i test_mm256_mask_cvtts_roundpd_epi64(__m256i W,__mmask8 U, __m256d A){ -// CHECK-LABEL: @test_mm256_mask_cvtts_roundpd_epi64 -// CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.256(<4 x double> - return _mm256_mask_cvtts_roundpd_epi64(W,U,A,_MM_FROUND_NEARBYINT ); -} - -__m256i test_mm256_maskz_cvtts_roundpd_epi64(__mmask8 U, __m256d A){ -// CHECK-LABEL: @test_mm256_maskz_cvtts_roundpd_epi64 -// CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.256(<4 x double> - return _mm256_maskz_cvtts_roundpd_epi64(U,A,_MM_FROUND_NEARBYINT ); -} - -__m256i test_mm256_cvttspd_epu64(__m256d A){ -// CHECK-LABEL: @test_mm256_cvttspd_epu64 -// CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.256(<4 x double> - return _mm256_cvttspd_epu64(A); -} - -__m256i test_mm256_mask_cvttspd_epu64(__m256i W,__mmask8 U, __m256d A){ -// CHECK-LABEL: @test_mm256_mask_cvttspd_epu64 -// CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.256(<4 x double> - return _mm256_mask_cvttspd_epu64(W,U, A); -} - -__m256i test_mm256_maskz_cvttspd_epu64(__mmask8 U, __m256d A){ -// CHECK-LABEL: @test_mm256_maskz_cvttspd_epu64 -// CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.256(<4 x double> - return _mm256_maskz_cvttspd_epu64(U, A); -} - -__m256i test_mm256_cvtts_roundpd_epu64(__m256d A){ -// CHECK-LABEL: @test_mm256_cvtts_roundpd_epu64 +__m256i test_mm256_cvtts_pd_epu64(__m256d A){ +// CHECK-LABEL: @test_mm256_cvtts_pd_epu64 // CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.256(<4 x double> - return _mm256_cvtts_roundpd_epu64(A,_MM_FROUND_NEARBYINT ); + return _mm256_cvtts_pd_epu64(A); } -__m256i test_mm256_mask_cvtts_roundpd_epu64(__m256i W,__mmask8 U, __m256d A){ -// CHECK-LABEL: @test_mm256_mask_cvtts_roundpd_epu64 +__m256i test_mm256_mask_cvtts_pd_epu64(__m256i W,__mmask8 U, __m256d A){ +// CHECK-LABEL: @test_mm256_mask_cvtts_pd_epu64 // CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.256(<4 x double> - return _mm256_mask_cvtts_roundpd_epu64(W,U,A,_MM_FROUND_NEARBYINT ); + return _mm256_mask_cvtts_pd_epu64(W,U, A); } -__m256i test_mm256_maskz_cvtts_roundpd_epu64(__mmask8 U, __m256d A){ -// CHECK-LABEL: @test_mm256_maskz_cvtts_roundpd_epu64 +__m256i test_mm256_maskz_cvtts_pd_epu64(__mmask8 U, __m256d A){ +// CHECK-LABEL: @test_mm256_maskz_cvtts_pd_epu64 // CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.256(<4 x double> - return _mm256_maskz_cvtts_roundpd_epu64(U,A,_MM_FROUND_NEARBYINT ); + return _mm256_maskz_cvtts_pd_epu64(U, A); } // 128 bit -__m128i test_mm_cvttsps_epi64(__m128 A){ - // CHECK-LABEL: @test_mm_cvttsps_epi64 +__m128i test_mm_cvtts_ps_epi64(__m128 A){ + // CHECK-LABEL: @test_mm_cvtts_ps_epi64 // CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.128(<4 x float> - return _mm_cvttsps_epi64(A); + return _mm_cvtts_ps_epi64(A); } -__m128i test_mm_mask_cvttsps_epi64(__m128i W, __mmask8 U, __m128 A){ - // CHECK-LABEL: @test_mm_mask_cvttsps_epi64 +__m128i test_mm_mask_cvtts_ps_epi64(__m128i W, __mmask8 U, __m128 A){ + // CHECK-LABEL: @test_mm_mask_cvtts_ps_epi64 // CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.128(<4 x float> - return _mm_mask_cvttsps_epi64(W, U, A); + return _mm_mask_cvtts_ps_epi64(W, U, A); } -__m128i test_mm_maskz_cvttsps_epi64(__mmask8 U,__m128 A){ - // CHECK-LABEL: @test_mm_maskz_cvttsps_epi64 +__m128i test_mm_maskz_cvtts_ps_epi64(__mmask8 U,__m128 A){ + // CHECK-LABEL: @test_mm_maskz_cvtts_ps_epi64 // CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.128(<4 x float> - return _mm_maskz_cvttsps_epi64(U, A); + return _mm_maskz_cvtts_ps_epi64(U, A); } -__m128i test_mm_cvttsps_epu64(__m128 A){ - // CHECK-LABEL: @test_mm_cvttsps_epu64 +__m128i test_mm_cvtts_ps_epu64(__m128 A){ + // CHECK-LABEL: @test_mm_cvtts_ps_epu64 // CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.128(<4 x float> - return _mm_cvttsps_epu64(A); + return _mm_cvtts_ps_epu64(A); } -__m128i test_mm_mask_cvttsps_epu64(__m128i W, __mmask8 U, __m128 A){ - // CHECK-LABEL: @test_mm_mask_cvttsps_epu64 +__m128i test_mm_mask_cvtts_ps_epu64(__m128i W, __mmask8 U, __m128 A){ + // CHECK-LABEL: @test_mm_mask_cvtts_ps_epu64 // CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.128(<4 x float> - return _mm_mask_cvttsps_epu64(W, U, A); + return _mm_mask_cvtts_ps_epu64(W, U, A); } -__m128i test_mm_maskz_cvttsps_epu64(__mmask8 U,__m128 A){ - // CHECK-LABEL: @test_mm_maskz_cvttsps_epu64 +__m128i test_mm_maskz_cvtts_ps_epu64(__mmask8 U,__m128 A){ + // CHECK-LABEL: @test_mm_maskz_cvtts_ps_epu64 // CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.128(<4 x float> - return _mm_maskz_cvttsps_epu64(U, A); + return _mm_maskz_cvtts_ps_epu64(U, A); } -__m256i test_mm256_cvttsps_epi64(__m128 A){ -// CHECK-LABEL: @test_mm256_cvttsps_epi64 +__m256i test_mm256_cvtts_ps_epi64(__m128 A){ +// CHECK-LABEL: @test_mm256_cvtts_ps_epi64 // CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.256(<4 x float> - return _mm256_cvttsps_epi64(A); + return _mm256_cvtts_ps_epi64(A); } -__m256i test_mm256_mask_cvttsps_epi64(__m256i W,__mmask8 U, __m128 A){ -// CHECK-LABEL: @test_mm256_mask_cvttsps_epi64 +__m256i test_mm256_mask_cvtts_ps_epi64(__m256i W,__mmask8 U, __m128 A){ +// CHECK-LABEL: @test_mm256_mask_cvtts_ps_epi64 // CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.256(<4 x float> - return _mm256_mask_cvttsps_epi64(W,U, A); + return _mm256_mask_cvtts_ps_epi64(W,U, A); } -__m256i test_mm256_maskz_cvttsps_epi64(__mmask8 U, __m128 A){ -// CHECK-LABEL: @test_mm256_maskz_cvttsps_epi64 +__m256i test_mm256_maskz_cvtts_ps_epi64(__mmask8 U, __m128 A){ +// CHECK-LABEL: @test_mm256_maskz_cvtts_ps_epi64 // CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.256(<4 x float> - return _mm256_maskz_cvttsps_epi64(U, A); -} - -__m256i test_mm256_cvtts_roundps_epi64(__m128 A){ -// CHECK-LABEL: @test_mm256_cvtts_roundps_epi64 -// CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.256(<4 x float> - return _mm256_cvtts_roundps_epi64(A, _MM_FROUND_NEARBYINT ); -} - -__m256i test_mm256_mask_cvtts_roundps_epi64(__m256i W,__mmask8 U, __m128 A){ -// CHECK-LABEL: @test_mm256_mask_cvtts_roundps_epi64 -// CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.256(<4 x float> - return _mm256_mask_cvtts_roundps_epi64(W,U,A,_MM_FROUND_NEARBYINT ); -} - -__m256i test_mm256_maskz_cvtts_roundps_epi64(__mmask8 U, __m128 A){ -// CHECK-LABEL: @test_mm256_maskz_cvtts_roundps_epi64 -// CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.256(<4 x float> - return _mm256_maskz_cvtts_roundps_epi64(U,A,_MM_FROUND_NEARBYINT ); -} - -__m256i test_mm256_cvttsps_epu64(__m128 A){ -// CHECK-LABEL: @test_mm256_cvttsps_epu64 -// CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.256(<4 x float> - return _mm256_cvttsps_epu64(A); -} - -__m256i test_mm256_mask_cvttsps_epu64(__m256i W,__mmask8 U, __m128 A){ -// CHECK-LABEL: @test_mm256_mask_cvttsps_epu64 -// CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.256(<4 x float> - return _mm256_mask_cvttsps_epu64(W,U, A); -} - -__m256i test_mm256_maskz_cvttsps_epu64(__mmask8 U, __m128 A){ -// CHECK-LABEL: @test_mm256_maskz_cvttsps_epu64 -// CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.256(<4 x float> - return _mm256_maskz_cvttsps_epu64(U, A); + return _mm256_maskz_cvtts_ps_epi64(U, A); } -__m256i test_mm256_cvtts_roundps_epu64(__m128 A){ -// CHECK-LABEL: @test_mm256_cvtts_roundps_epu64 +__m256i test_mm256_cvtts_ps_epu64(__m128 A){ +// CHECK-LABEL: @test_mm256_cvtts_ps_epu64 // CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.256(<4 x float> - return _mm256_cvtts_roundps_epu64(A, _MM_FROUND_NEARBYINT ); + return _mm256_cvtts_ps_epu64(A); } -__m256i test_mm256_mask_cvtts_roundps_epu64(__m256i W,__mmask8 U, __m128 A){ -// CHECK-LABEL: @test_mm256_mask_cvtts_roundps_epu64 +__m256i test_mm256_mask_cvtts_ps_epu64(__m256i W,__mmask8 U, __m128 A){ +// CHECK-LABEL: @test_mm256_mask_cvtts_ps_epu64 // CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.256(<4 x float> - return _mm256_mask_cvtts_roundps_epu64(W,U,A,_MM_FROUND_NEARBYINT ); + return _mm256_mask_cvtts_ps_epu64(W,U, A); } -__m256i test_mm256_maskz_cvtts_roundps_epu64(__mmask8 U, __m128 A){ -// CHECK-LABEL: @test_mm256_maskz_cvtts_roundps_epu64 +__m256i test_mm256_maskz_cvtts_ps_epu64(__mmask8 U, __m128 A){ +// CHECK-LABEL: @test_mm256_maskz_cvtts_ps_epu64 // CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.256(<4 x float> - return _mm256_maskz_cvtts_roundps_epu64(U,A,_MM_FROUND_NEARBYINT ); + return _mm256_maskz_cvtts_ps_epu64(U, A); } diff --git a/clang/test/CodeGen/X86/avx10_2satcvtds-builtins.c b/clang/test/CodeGen/X86/avx10_2satcvtds-builtins.c index bb90f6a086fa2..b91af7073a55a 100644 --- a/clang/test/CodeGen/X86/avx10_2satcvtds-builtins.c +++ b/clang/test/CodeGen/X86/avx10_2satcvtds-builtins.c @@ -1,225 +1,150 @@ -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386 -target-feature +avx10.2-256 -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,X86 -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64 -target-feature +avx10.2-256 -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386 -target-feature +avx10.2-256 -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64 -target-feature +avx10.2-256 -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK #include #include -__m128i test_mm_cvttspd_epi32(__m128d A){ -// CHECK-LABEL: @test_mm_cvttspd_epi32 +__m128i test_mm_cvtts_pd_epi32(__m128d A){ +// CHECK-LABEL: @test_mm_cvtts_pd_epi32 // CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.128(<2 x double> - return _mm_cvttspd_epi32(A); + return _mm_cvtts_pd_epi32(A); } -__m128i test_mm_mask_cvttspd_epi32(__m128i W, __mmask8 U, __m128d A){ -// CHECK-LABEL: @test_mm_mask_cvttspd_epi32 +__m128i test_mm_mask_cvtts_pd_epi32(__m128i W, __mmask8 U, __m128d A){ +// CHECK-LABEL: @test_mm_mask_cvtts_pd_epi32 // CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.128(<2 x double> - return _mm_mask_cvttspd_epi32(W,U,A); + return _mm_mask_cvtts_pd_epi32(W,U,A); } -__m128i test_mm_maskz_cvttspd_epi32( __mmask8 U, __m128d A){ -// CHECK-LABEL: @test_mm_maskz_cvttspd_epi32( +__m128i test_mm_maskz_cvtts_pd_epi32( __mmask8 U, __m128d A){ +// CHECK-LABEL: @test_mm_maskz_cvtts_pd_epi32( // CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.128(<2 x double> - return _mm_maskz_cvttspd_epi32(U,A); + return _mm_maskz_cvtts_pd_epi32(U,A); } -__m128i test_mm256_cvttspd_epi32(__m256d A){ -// CHECK-LABEL: @test_mm256_cvttspd_epi32 +__m128i test_mm256_cvtts_pd_epi32(__m256d A){ +// CHECK-LABEL: @test_mm256_cvtts_pd_epi32 // CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.round.256(<4 x double> - return _mm256_cvttspd_epi32(A); + return _mm256_cvtts_pd_epi32(A); } -__m128i test_mm256_mask_cvttspd_epi32(__m128i W,__mmask8 U, __m256d A){ -// CHECK-LABEL: @test_mm256_mask_cvttspd_epi32 +__m128i test_mm256_mask_cvtts_pd_epi32(__m128i W,__mmask8 U, __m256d A){ +// CHECK-LABEL: @test_mm256_mask_cvtts_pd_epi32 // CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.round.256(<4 x double> - return _mm256_mask_cvttspd_epi32(W,U,A); + return _mm256_mask_cvtts_pd_epi32(W,U,A); } -__m128i test_mm256_maskz_cvttspd_epi32(__mmask8 U, __m256d A){ -// CHECK-LABEL: @test_mm256_maskz_cvttspd_epi32 +__m128i test_mm256_maskz_cvtts_pd_epi32(__mmask8 U, __m256d A){ +// CHECK-LABEL: @test_mm256_maskz_cvtts_pd_epi32 // CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.round.256(<4 x double> - return _mm256_maskz_cvttspd_epi32(U,A); + return _mm256_maskz_cvtts_pd_epi32(U,A); } -__m128i test_mm256_cvtts_roundpd_epi32(__m256d A){ -// CHECK-LABEL: @test_mm256_cvtts_roundpd_epi32 -// CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.round.256(<4 x double> - return _mm256_cvtts_roundpd_epi32(A, _MM_FROUND_NEARBYINT); -} - -__m128i test_mm256_mask_cvtts_roundpd_epi32(__m128i W,__mmask8 U, __m256d A){ -// CHECK-LABEL: @test_mm256_mask_cvtts_roundpd_epi32 -// CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.round.256(<4 x double> - return _mm256_mask_cvtts_roundpd_epi32(W,U,A,_MM_FROUND_NEARBYINT); -} - -__m128i test_mm256_maskz_cvtts_roundpd_epi32(__mmask8 U, __m256d A){ -// CHECK-LABEL: @test_mm256_maskz_cvtts_roundpd_epi32 -// CHECK: @llvm.x86.avx10.mask.vcvttpd2dqs.round.256(<4 x double> - return _mm256_maskz_cvtts_roundpd_epi32(U,A,_MM_FROUND_NEARBYINT); -} - -__m128i test_mm_cvttspd_epu32(__m128d A){ -// CHECK-LABEL: @test_mm_cvttspd_epu32 +__m128i test_mm_cvtts_pd_epu32(__m128d A){ +// CHECK-LABEL: @test_mm_cvtts_pd_epu32 // CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.128(<2 x double> - return _mm_cvttspd_epu32(A); + return _mm_cvtts_pd_epu32(A); } -__m128i test_mm_mask_cvttspd_epu32(__m128i W, __mmask8 U, __m128d A){ -// CHECK-LABEL: @test_mm_mask_cvttspd_epu32 +__m128i test_mm_mask_cvtts_pd_epu32(__m128i W, __mmask8 U, __m128d A){ +// CHECK-LABEL: @test_mm_mask_cvtts_pd_epu32 // CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.128(<2 x double> - return _mm_mask_cvttspd_epu32(W,U,A); + return _mm_mask_cvtts_pd_epu32(W,U,A); } -__m128i test_mm_maskz_cvttspd_epu32( __mmask8 U, __m128d A){ -// CHECK-LABEL: @test_mm_maskz_cvttspd_epu32 +__m128i test_mm_maskz_cvtts_pd_epu32( __mmask8 U, __m128d A){ +// CHECK-LABEL: @test_mm_maskz_cvtts_pd_epu32 // CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.128(<2 x double> - return _mm_maskz_cvttspd_epu32(U,A); + return _mm_maskz_cvtts_pd_epu32(U,A); } -__m128i test_mm256_cvttspd_epu32(__m256d A){ -// CHECK-LABEL: @test_mm256_cvttspd_epu32 +__m128i test_mm256_cvtts_pd_epu32(__m256d A){ +// CHECK-LABEL: @test_mm256_cvtts_pd_epu32 // CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.round.256(<4 x double> - return _mm256_cvttspd_epu32(A); + return _mm256_cvtts_pd_epu32(A); } -__m128i test_mm256_mask_cvttspd_epu32(__m128i W,__mmask8 U, __m256d A){ -// CHECK-LABEL: @test_mm256_mask_cvttspd_epu32 +__m128i test_mm256_mask_cvtts_pd_epu32(__m128i W,__mmask8 U, __m256d A){ +// CHECK-LABEL: @test_mm256_mask_cvtts_pd_epu32 // CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.round.256(<4 x double> - return _mm256_mask_cvttspd_epu32(W,U,A); + return _mm256_mask_cvtts_pd_epu32(W,U,A); } -__m128i test_mm256_maskz_cvttspd_epu32(__mmask8 U, __m256d A){ -// CHECK-LABEL: @test_mm256_maskz_cvttspd_epu32 +__m128i test_mm256_maskz_cvtts_pd_epu32(__mmask8 U, __m256d A){ +// CHECK-LABEL: @test_mm256_maskz_cvtts_pd_epu32 // CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.round.256(<4 x double> - return _mm256_maskz_cvttspd_epu32(U,A); + return _mm256_maskz_cvtts_pd_epu32(U,A); } -__m128i test_mm256_cvtts_roundpd_epu32(__m256d A){ -// CHECK-LABEL: @test_mm256_cvtts_roundpd_epu32 -// CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.round.256(<4 x double> - return _mm256_cvtts_roundpd_epu32(A, _MM_FROUND_NEARBYINT); -} - -__m128i test_mm256_mask_cvtts_roundpd_epu32(__m128i W,__mmask8 U, __m256d A){ -// CHECK-LABEL: @test_mm256_mask_cvtts_roundpd_epu32 -// CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.round.256(<4 x double> - return _mm256_mask_cvtts_roundpd_epu32(W,U,A,_MM_FROUND_NEARBYINT); -} - -__m128i test_mm256_maskz_cvtts_roundpd_epu32(__mmask8 U, __m256d A){ -// CHECK-LABEL: @test_mm256_maskz_cvtts_roundpd_epu32 -// CHECK: @llvm.x86.avx10.mask.vcvttpd2udqs.round.256(<4 x double> - return _mm256_maskz_cvtts_roundpd_epu32(U,A,_MM_FROUND_NEARBYINT); -} - -__m128i test_mm_cvttsps_epi32(__m128 A){ -// CHECK-LABEL: @test_mm_cvttsps_epi32 +__m128i test_mm_cvtts_ps_epi32(__m128 A){ +// CHECK-LABEL: @test_mm_cvtts_ps_epi32 // CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.128(<4 x float> - return _mm_cvttsps_epi32(A); + return _mm_cvtts_ps_epi32(A); } -__m128i test_mm_mask_cvttsps_epi32(__m128i W, __mmask8 U, __m128 A){ -// CHECK-LABEL: @test_mm_mask_cvttsps_epi32 +__m128i test_mm_mask_cvtts_ps_epi32(__m128i W, __mmask8 U, __m128 A){ +// CHECK-LABEL: @test_mm_mask_cvtts_ps_epi32 // CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.128(<4 x float> - return _mm_mask_cvttsps_epi32(W,U,A); + return _mm_mask_cvtts_ps_epi32(W,U,A); } -__m128i test_mm_maskz_cvttsps_epi32( __mmask8 U, __m128 A){ -// CHECK-LABEL: @test_mm_maskz_cvttsps_epi32 +__m128i test_mm_maskz_cvtts_ps_epi32( __mmask8 U, __m128 A){ +// CHECK-LABEL: @test_mm_maskz_cvtts_ps_epi32 // CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.128(<4 x float> - return _mm_maskz_cvttsps_epi32(U,A); + return _mm_maskz_cvtts_ps_epi32(U,A); } -__m256i test_mm256_cvttsps_epi32(__m256 A){ -// CHECK-LABEL: @test_mm256_cvttsps_epi32 +__m256i test_mm256_cvtts_ps_epi32(__m256 A){ +// CHECK-LABEL: @test_mm256_cvtts_ps_epi32 // CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.round.256(<8 x float> - return _mm256_cvttsps_epi32(A); + return _mm256_cvtts_ps_epi32(A); } -__m256i test_mm256_mask_cvttsps_epi32(__m256i W,__mmask8 U, __m256 A){ -// CHECK-LABEL: @test_mm256_mask_cvttsps_epi32 +__m256i test_mm256_mask_cvtts_ps_epi32(__m256i W,__mmask8 U, __m256 A){ +// CHECK-LABEL: @test_mm256_mask_cvtts_ps_epi32 // CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.round.256(<8 x float> - return _mm256_mask_cvttsps_epi32(W,U,A); + return _mm256_mask_cvtts_ps_epi32(W,U,A); } -__m256i test_mm256_maskz_cvttsps_epi32(__mmask8 U, __m256 A){ -// CHECK-LABEL: @test_mm256_maskz_cvttsps_epi32 +__m256i test_mm256_maskz_cvtts_ps_epi32(__mmask8 U, __m256 A){ +// CHECK-LABEL: @test_mm256_maskz_cvtts_ps_epi32 // CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.round.256(<8 x float> - return _mm256_maskz_cvttsps_epi32(U,A); + return _mm256_maskz_cvtts_ps_epi32(U,A); } -__m256i test_mm256_cvtts_roundps_epi32(__m256 A){ -// CHECK-LABEL: @test_mm256_cvtts_roundps_epi32 -// CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.round.256(<8 x float> - return _mm256_cvtts_roundps_epi32(A, _MM_FROUND_NEARBYINT); -} - -__m256i test_mm256_mask_cvtts_roundps_epi32(__m256i W,__mmask8 U, __m256 A){ -// CHECK-LABEL: @test_mm256_mask_cvtts_roundps_epi32 -// CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.round.256(<8 x float> - return _mm256_mask_cvtts_roundps_epi32(W,U,A,_MM_FROUND_NEARBYINT); -} - -__m256i test_mm256_maskz_cvtts_roundps_epi32(__mmask8 U, __m256 A){ -// CHECK-LABEL: @test_mm256_maskz_cvtts_roundps_epi32 -// CHECK: @llvm.x86.avx10.mask.vcvttps2dqs.round.256(<8 x float> - return _mm256_maskz_cvtts_roundps_epi32(U,A,_MM_FROUND_NEARBYINT); -} - -__m128i test_mm_cvttsps_epu32(__m128 A){ -// CHECK-LABEL: @test_mm_cvttsps_epu32 +__m128i test_mm_cvtts_ps_epu32(__m128 A){ +// CHECK-LABEL: @test_mm_cvtts_ps_epu32 // CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.128(<4 x float> - return _mm_cvttsps_epu32(A); + return _mm_cvtts_ps_epu32(A); } -__m128i test_mm_mask_cvttsps_epu32(__m128i W, __mmask8 U, __m128 A){ -// CHECK-LABEL: @test_mm_mask_cvttsps_epu32 +__m128i test_mm_mask_cvtts_ps_epu32(__m128i W, __mmask8 U, __m128 A){ +// CHECK-LABEL: @test_mm_mask_cvtts_ps_epu32 // CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.128(<4 x float> - return _mm_mask_cvttsps_epu32(W,U,A); + return _mm_mask_cvtts_ps_epu32(W,U,A); } -__m128i test_mm_maskz_cvttsps_epu32( __mmask8 U, __m128 A){ -// CHECK-LABEL: @test_mm_maskz_cvttsps_epu32 +__m128i test_mm_maskz_cvtts_ps_epu32( __mmask8 U, __m128 A){ +// CHECK-LABEL: @test_mm_maskz_cvtts_ps_epu32 // CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.128(<4 x float> - return _mm_maskz_cvttsps_epu32(U,A); -} - -__m256i test_mm256_cvttsps_epu32(__m256 A){ -// CHECK-LABEL: @test_mm256_cvttsps_epu32 -// CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.round.256(<8 x float> - return _mm256_cvttsps_epu32(A); -} - -__m256i test_mm256_mask_cvttsps_epu32(__m256i W,__mmask8 U, __m256 A){ -// CHECK-LABEL: @test_mm256_mask_cvttsps_epu32 -// CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.round.256(<8 x float> - return _mm256_mask_cvttsps_epu32(W,U,A); + return _mm_maskz_cvtts_ps_epu32(U,A); } -__m256i test_mm256_maskz_cvttsps_epu32(__mmask8 U, __m256 A){ -// CHECK-LABEL: @test_mm256_maskz_cvttsps_epu32 +__m256i test_mm256_cvtts_ps_epu32(__m256 A){ +// CHECK-LABEL: @test_mm256_cvtts_ps_epu32 // CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.round.256(<8 x float> - return _mm256_maskz_cvttsps_epu32(U,A); + return _mm256_cvtts_ps_epu32(A); } -__m256i test_mm256_cvtts_roundps_epu32(__m256 A){ -// CHECK-LABEL: @test_mm256_cvtts_roundps_epu32 +__m256i test_mm256_mask_cvtts_ps_epu32(__m256i W,__mmask8 U, __m256 A){ +// CHECK-LABEL: @test_mm256_mask_cvtts_ps_epu32 // CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.round.256(<8 x float> - return _mm256_cvtts_roundps_epu32(A, _MM_FROUND_NEARBYINT); + return _mm256_mask_cvtts_ps_epu32(W,U,A); } -__m256i test_mm256_mask_cvtts_roundps_epu32(__m256i W,__mmask8 U, __m256 A){ -// CHECK-LABEL: @test_mm256_mask_cvtts_roundps_epu32 +__m256i test_mm256_maskz_cvtts_ps_epu32(__mmask8 U, __m256 A){ +// CHECK-LABEL: @test_mm256_maskz_cvtts_ps_epu32 // CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.round.256(<8 x float> - return _mm256_mask_cvtts_roundps_epu32(W,U,A,_MM_FROUND_NEARBYINT); + return _mm256_maskz_cvtts_ps_epu32(U,A); } - -__m256i test_mm256_maskz_cvtts_roundps_epu32(__mmask8 U, __m256 A){ -// CHECK-LABEL: @test_mm256_maskz_cvtts_roundps_epu32 -// CHECK: @llvm.x86.avx10.mask.vcvttps2udqs.round.256(<8 x float> - return _mm256_maskz_cvtts_roundps_epu32(U,A,_MM_FROUND_NEARBYINT); -} - -// X64: {{.*}} -// X86: {{.*}} diff --git a/clang/test/CodeGen/attr-target-x86.c b/clang/test/CodeGen/attr-target-x86.c index c92aad633082f..e5067c1c3b075 100644 --- a/clang/test/CodeGen/attr-target-x86.c +++ b/clang/test/CodeGen/attr-target-x86.c @@ -56,7 +56,7 @@ void f_default2(void) { __attribute__((target("avx, sse4.2, arch= ivybridge"))) void f_avx_sse4_2_ivybridge_2(void) {} -// CHECK: [[f_no_aes_ivybridge]] = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-amx-avx512,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-vaes" +// CHECK: [[f_no_aes_ivybridge]] = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes" __attribute__((target("no-aes, arch=ivybridge"))) void f_no_aes_ivybridge(void) {} @@ -98,11 +98,11 @@ void f_x86_64_v3(void) {} __attribute__((target("arch=x86-64-v4"))) void f_x86_64_v4(void) {} -// CHECK: [[f_avx10_1_256]] = {{.*}}"target-cpu"="i686" "target-features"="+aes,+avx,+avx10.1-256,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+f16c,+fma,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+x87,+xsave,-amx-avx512,-avx10.1-512,-avx10.2-512,-evex512" +// CHECK: [[f_avx10_1_256]] = {{.*}}"target-cpu"="i686" "target-features"="+avx,+avx10.1-256,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+f16c,+fma,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,-amx-avx512,-avx10.1-512,-avx10.2-512,-evex512" __attribute__((target("avx10.1-256"))) void f_avx10_1_256(void) {} -// CHECK: [[f_avx10_1_512]] = {{.*}}"target-cpu"="i686" "target-features"="+aes,+avx,+avx10.1-256,+avx10.1-512,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+evex512,+f16c,+fma,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+x87,+xsave" +// CHECK: [[f_avx10_1_512]] = {{.*}}"target-cpu"="i686" "target-features"="+avx,+avx10.1-256,+avx10.1-512,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+evex512,+f16c,+fma,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" __attribute__((target("avx10.1-512"))) void f_avx10_1_512(void) {} @@ -112,4 +112,4 @@ void f_prefer_256_bit(void) {} // CHECK: [[f_no_prefer_256_bit]] = {{.*}}"target-features"="{{.*}}-prefer-256-bit __attribute__((target("no-prefer-256-bit"))) -void f_no_prefer_256_bit(void) {} \ No newline at end of file +void f_no_prefer_256_bit(void) {} diff --git a/clang/test/CodeGenCXX/mangle-ms-matrix.cpp b/clang/test/CodeGenCXX/mangle-ms-matrix.cpp new file mode 100644 index 0000000000000..b244aa6e33cfa --- /dev/null +++ b/clang/test/CodeGenCXX/mangle-ms-matrix.cpp @@ -0,0 +1,57 @@ +// RUN: %clang_cc1 -fenable-matrix -fms-extensions -fcxx-exceptions -ffreestanding -target-feature +avx -emit-llvm %s -o - -triple=i686-pc-win32 | FileCheck %s +// RUN: %clang_cc1 -fenable-matrix -fms-extensions -fcxx-exceptions -ffreestanding -target-feature +avx -emit-llvm %s -o - -triple=i686-pc-win32 -fexperimental-new-constant-interpreter | FileCheck %s + +typedef float __attribute__((matrix_type(4, 4))) m4x4f; +typedef float __attribute__((matrix_type(2, 2))) m2x2f; + +typedef int __attribute__((matrix_type(4, 4))) m4x4i; +typedef int __attribute__((matrix_type(2, 2))) m2x2i; + +void thow(int i) { + switch (i) { + case 0: throw m4x4f(); + // CHECK: ??_R0U?$__matrix@M$03$03@__clang@@@8 + // CHECK: _CT??_R0U?$__matrix@M$03$03@__clang@@@864 + // CHECK: _CTA1U?$__matrix@M$03$03@__clang@@ + // CHECK: _TI1U?$__matrix@M$03$03@__clang@@ + case 1: throw m2x2f(); + // CHECK: ??_R0U?$__matrix@M$01$01@__clang@@@8 + // CHECK: _CT??_R0U?$__matrix@M$01$01@__clang@@@816 + // CHECK: _CTA1U?$__matrix@M$01$01@__clang@@ + // CHECK: _TI1U?$__matrix@M$01$01@__clang@@ + case 2: throw m4x4i(); + // CHECK: ??_R0U?$__matrix@H$03$03@__clang@@@8 + // CHECK: _CT??_R0U?$__matrix@H$03$03@__clang@@@864 + // CHECK: _CTA1U?$__matrix@H$03$03@__clang@@ + // CHECK: _TI1U?$__matrix@H$03$03@__clang@@ + case 3: throw m2x2i(); + // CHECK: ??_R0U?$__matrix@H$01$01@__clang@@@8 + // CHECK: _CT??_R0U?$__matrix@H$01$01@__clang@@@816 + // CHECK: _CTA1U?$__matrix@H$01$01@__clang@@ + // CHECK: _TI1U?$__matrix@H$01$01@__clang@@ + } +} + +void foo44f(m4x4f) {} +// CHECK: define dso_local void @"?foo44f@@YAXU?$__matrix@M$03$03@__clang@@@Z" + +m4x4f rfoo44f() { return m4x4f(); } +// CHECK: define dso_local noundef <16 x float> @"?rfoo44f@@YAU?$__matrix@M$03$03@__clang@@XZ" + +void foo22f(m2x2f) {} +// CHECK: define dso_local void @"?foo22f@@YAXU?$__matrix@M$01$01@__clang@@@Z" + +m2x2f rfoo22f() { return m2x2f(); } +// CHECK: define dso_local noundef <4 x float> @"?rfoo22f@@YAU?$__matrix@M$01$01@__clang@@XZ" + +void foo44i(m4x4i) {} +// CHECK: define dso_local void @"?foo44i@@YAXU?$__matrix@H$03$03@__clang@@@Z" + +m4x4i rfoo44i() { return m4x4i(); } +// CHECK: define dso_local noundef <16 x i32> @"?rfoo44i@@YAU?$__matrix@H$03$03@__clang@@XZ" + +void foo22i(m2x2i) {} +// CHECK: define dso_local void @"?foo22i@@YAXU?$__matrix@H$01$01@__clang@@@Z" + +m2x2i rfoo22i() { return m2x2i(); } +// CHECK: define dso_local noundef <4 x i32> @"?rfoo22i@@YAU?$__matrix@H$01$01@__clang@@XZ" \ No newline at end of file diff --git a/clang/test/CodeGenCoroutines/pr134409.cpp b/clang/test/CodeGenCoroutines/pr134409.cpp new file mode 100644 index 0000000000000..142962d44ede4 --- /dev/null +++ b/clang/test/CodeGenCoroutines/pr134409.cpp @@ -0,0 +1,43 @@ +// An end-to-end test to make sure coroutine passes are added for thinlto. +// REQUIRES: x86-registered-target +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++23 -ffat-lto-objects -flto=thin -emit-llvm %s -O3 -o - \ +// RUN: | FileCheck %s + +#include "Inputs/coroutine.h" + +class BasicCoroutine { +public: + struct Promise { + BasicCoroutine get_return_object() { return BasicCoroutine {}; } + + void unhandled_exception() noexcept { } + + void return_void() noexcept { } + + std::suspend_never initial_suspend() noexcept { return {}; } + std::suspend_never final_suspend() noexcept { return {}; } + }; + using promise_type = Promise; +}; + +// COM: match the embedded module, so we don't match something in it by accident. +// CHECK: @llvm.embedded.object = {{.*}} +// CHECK: @llvm.compiler.used = {{.*}} + +BasicCoroutine coro() { +// CHECK: define {{.*}} void @_Z4corov() {{.*}} { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret void +// CHECK-NEXT: } + co_return; +} + +int main() { +// CHECK: define {{.*}} i32 @main() {{.*}} { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @_Z4corov() +// CHECK-NEXT: ret i32 0 +// CHECK-NEXT: } + coro(); +} + diff --git a/clang/test/Driver/arm-mfpu.c b/clang/test/Driver/arm-mfpu.c index 640e1b35c84b8..a9bdcd598516a 100644 --- a/clang/test/Driver/arm-mfpu.c +++ b/clang/test/Driver/arm-mfpu.c @@ -356,10 +356,8 @@ // CHECK-HF-DAG: "-target-cpu" "arm1176jzf-s" // RUN: %clang -target armv7-apple-darwin -x assembler %s -### -c 2>&1 \ -// RUN: | FileCheck --check-prefix=ASM-NEON %s -// RUN: %clang -target armv7-windows -x assembler %s -### -c 2>&1 \ -// RUN: | FileCheck --check-prefix=ASM-NEON %s -// ASM-NEON: "-target-feature" "+neon" +// RUN: | FileCheck --check-prefix=ASM %s +// ASM-NOT: -target-feature // RUN: %clang -target armv8-linux-gnueabi -mfloat-abi=soft -mfpu=none %s -### -c 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-SOFT-ABI-FP %s diff --git a/clang/test/Driver/hexagon-cpu-default.c b/clang/test/Driver/hexagon-cpu-default.c new file mode 100644 index 0000000000000..31fb839f21656 --- /dev/null +++ b/clang/test/Driver/hexagon-cpu-default.c @@ -0,0 +1,4 @@ +// CHECK: "-target-cpu" "hexagonv68" + +// RUN: %clang -c %s -### --target=hexagon-unknown-elf \ +// RUN: 2>&1 | FileCheck %s diff --git a/clang/test/Driver/hexagon-toolchain-elf.c b/clang/test/Driver/hexagon-toolchain-elf.c index be812dda40d57..de2ebfeeda26c 100644 --- a/clang/test/Driver/hexagon-toolchain-elf.c +++ b/clang/test/Driver/hexagon-toolchain-elf.c @@ -555,6 +555,7 @@ // RUN: -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \ // RUN: -mcpu=hexagonv60 \ // RUN: -fuse-ld=lld %s 2>&1 | FileCheck -check-prefix=CHECK382 %s +// CHECK382: "--eh-frame-hdr // CHECK382-NOT: "-march= // CHECK382-NOT: "-mcpu= // ----------------------------------------------------------------------------- diff --git a/clang/test/Driver/hexagon-toolchain-linux.c b/clang/test/Driver/hexagon-toolchain-linux.c index 6f7f3b20f9141..e791353cca07f 100644 --- a/clang/test/Driver/hexagon-toolchain-linux.c +++ b/clang/test/Driver/hexagon-toolchain-linux.c @@ -127,6 +127,7 @@ // RUN: --target=hexagon-unknown-linux-musl %s -### 2>&1 \ // RUN: | FileCheck -check-prefix=CHECK011 %s // CHECK011: InstalledDir: [[INSTALLED_DIR:.+]] +// CHECK011: "--eh-frame-hdr" // CHECK011: crt1.o // CHECK011-NOT: "-lunwind" // CHECK011-NOT: "-lgcc_eh" diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-fujitsu-monaka.c b/clang/test/Driver/print-enabled-extensions/aarch64-fujitsu-monaka.c index a80d0f5c79ec1..29e9682d58700 100644 --- a/clang/test/Driver/print-enabled-extensions/aarch64-fujitsu-monaka.c +++ b/clang/test/Driver/print-enabled-extensions/aarch64-fujitsu-monaka.c @@ -28,6 +28,8 @@ // CHECK-NEXT: FEAT_FP16 Enable half-precision floating-point data processing // CHECK-NEXT: FEAT_FP8 Enable FP8 instructions // CHECK-NEXT: FEAT_FP8DOT2 Enable FP8 2-way dot instructions +// CHECK-NEXT: FEAT_FP8DOT4 Enable FP8 4-way dot instructions +// CHECK-NEXT: FEAT_FP8FMA Enable Armv9.5-A FP8 multiply-add instructions // CHECK-NEXT: FEAT_FPAC Enable Armv8.3-A Pointer Authentication Faulting enhancement // CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int // CHECK-NEXT: FEAT_FlagM Enable Armv8.4-A Flag Manipulation instructions diff --git a/clang/test/Driver/systemz-march.c b/clang/test/Driver/systemz-march.c index 93a11c6c9c013..8922db9f2d5d6 100644 --- a/clang/test/Driver/systemz-march.c +++ b/clang/test/Driver/systemz-march.c @@ -15,6 +15,7 @@ // RUN: %clang -target s390x -### -S -emit-llvm -march=arch13 %s 2>&1 | FileCheck --check-prefix=CHECK-ARCH13 %s // RUN: %clang -target s390x -### -S -emit-llvm -march=z16 %s 2>&1 | FileCheck --check-prefix=CHECK-Z16 %s // RUN: %clang -target s390x -### -S -emit-llvm -march=arch14 %s 2>&1 | FileCheck --check-prefix=CHECK-ARCH14 %s +// RUN: %clang -target s390x -### -S -emit-llvm -march=z17 %s 2>&1 | FileCheck --check-prefix=CHECK-Z17 %s // RUN: %clang -target s390x -### -S -emit-llvm -march=arch15 %s 2>&1 | FileCheck --check-prefix=CHECK-ARCH15 %s // CHECK-Z9: error: unknown target CPU 'z9' @@ -32,6 +33,7 @@ // CHECK-ARCH13: "-target-cpu" "arch13" // CHECK-Z16: "-target-cpu" "z16" // CHECK-ARCH14: "-target-cpu" "arch14" +// CHECK-Z17: "-target-cpu" "z17" // CHECK-ARCH15: "-target-cpu" "arch15" int x; diff --git a/clang/test/Interpreter/out-of-process.cpp b/clang/test/Interpreter/out-of-process.cpp new file mode 100644 index 0000000000000..edc4c3fee0966 --- /dev/null +++ b/clang/test/Interpreter/out-of-process.cpp @@ -0,0 +1,88 @@ +// REQUIRES: host-supports-jit + +// RUN: cat %s | clang-repl -oop-executor -orc-runtime | FileCheck %s + +extern "C" int printf(const char *, ...); + +int intVar = 0; +double doubleVar = 3.14; +%undo +double doubleVar = 2.71; + +auto r1 = printf("intVar = %d\n", intVar); +// CHECK: intVar = 0 +auto r2 = printf("doubleVar = %.2f\n", doubleVar); +// CHECK: doubleVar = 2.71 + +// Test redefinition with inline and static functions. +int add(int a, int b, int c) { return a + b + c; } +%undo // Revert to the initial version of add +inline int add(int a, int b) { return a + b; } + +auto r3 = printf("add(1, 2) = %d\n", add(1, 2)); +// CHECK-NEXT: add(1, 2) = 3 + +// Test inline and lambda functions with variations. +inline int square(int x) { return x * x; } +auto lambdaSquare = [](int x) { return x * x; }; +auto lambdaMult = [](int a, int b) { return a * b; }; + +auto r4 = printf("square(4) = %d\n", square(4)); +// CHECK-NEXT: square(4) = 16 +auto lambda_r1 = printf("lambdaSquare(5) = %d\n", lambdaSquare(5)); +// CHECK-NEXT: lambdaSquare(5) = 25 +auto lambda_r2 = printf("lambdaMult(2, 3) = %d\n", lambdaMult(2, 3)); +// CHECK-NEXT: lambdaMult(2, 3) = 6 + +%undo // Undo previous lambda assignments +auto lambda_r3 = lambdaMult(3, 4); // Should fail or revert to the original lambda + +// Test weak and strong symbol linkage. +int __attribute__((weak)) weakFunc() { return 42; } +int strongFunc() { return 100; } +%undo // Revert the weak function + +auto r5 = printf("weakFunc() = %d\n", weakFunc()); +// CHECK: weakFunc() = 42 +auto r6 = printf("strongFunc() = %d\n", strongFunc()); +// CHECK-NEXT: strongFunc() = 100 + +// Weak variable linkage with different types. +int varA = 20; +static __typeof(varA) weakVarA __attribute__((__weakref__("varA"))); +char charVar = 'c'; +static __typeof(charVar) weakCharVar __attribute__((__weakref__("charVar"))); +auto r7 = printf("weakVarA = %d\n", weakVarA); +// CHECK: weakVarA = 20 +auto r8 = printf("weakCharVar = %c\n", weakCharVar); +// CHECK-NEXT: weakCharVar = c + +// Test complex lambdas with captures. +int captureVar = 5; +auto captureLambda = [](int x) { return x + captureVar; };` +int result1 = captureLambda(10); +%undo // Undo capture lambda + +auto r9 = printf("captureLambda(10) = %d\n", result1); +// CHECK: captureLambda(10) = 15 + +// Multiline statement test with arithmetic operations. +int sum = \ + 5 + \ + 10; +int prod = sum * 2; +auto r10 = printf("sum = %d, prod = %d\n", sum, prod); +// CHECK: sum = 15, prod = 30 + +// Test multiline functions and macro behavior. +#define MULTIPLY(a, b) ((a) * (b)) + +int complexFunc(int x) \ +{ \ + return MULTIPLY(x, 2) + x; \ +} + +auto r11 = printf("complexFunc(5) = %d\n", complexFunc(5)); +// CHECK: complexFunc(5) = 15 + +%quit diff --git a/clang/test/Misc/target-invalid-cpu-note/systemz.c b/clang/test/Misc/target-invalid-cpu-note/systemz.c index b70173f5feec2..021c280d53190 100644 --- a/clang/test/Misc/target-invalid-cpu-note/systemz.c +++ b/clang/test/Misc/target-invalid-cpu-note/systemz.c @@ -20,4 +20,5 @@ // CHECK-SAME: {{^}}, arch14 // CHECK-SAME: {{^}}, z16 // CHECK-SAME: {{^}}, arch15 +// CHECK-SAME: {{^}}, z17 // CHECK-SAME: {{$}} diff --git a/clang/test/Modules/MixedModulePrecompile.cpp b/clang/test/Modules/MixedModulePrecompile.cpp new file mode 100644 index 0000000000000..473817ef71de6 --- /dev/null +++ b/clang/test/Modules/MixedModulePrecompile.cpp @@ -0,0 +1,63 @@ +// Tests mixed usage of precompiled headers and modules. +// +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 -x c++-header -emit-pch %t/a.hpp \ +// RUN: -o %t/a.pch + +// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/Part1.cppm \ +// RUN: -include-pch %t/a.pch -o %t/Part1.pcm +// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/Part2.cppm \ +// RUN: -include-pch %t/a.pch -o %t/Part2.pcm +// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/Part3.cppm \ +// RUN: -include-pch %t/a.pch -o %t/Part3.pcm +// RUN: %clang_cc1 -std=c++20 -emit-module-interface %t/Part4.cppm \ +// RUN: -include-pch %t/a.pch -o %t/Part4.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-module-interface \ +// RUN: -fmodule-file=mod:part1=%t/Part1.pcm \ +// RUN: -fmodule-file=mod:part2=%t/Part2.pcm \ +// RUN: -fmodule-file=mod:part3=%t/Part3.pcm \ +// RUN: -fmodule-file=mod:part4=%t/Part4.pcm \ +// RUN: %t/Mod.cppm \ +// RUN: -include-pch %t/a.pch -o %t/Mod.pcm + +// RUN: %clang_cc1 -std=c++20 -emit-obj \ +// RUN: -main-file-name Mod.cppm \ +// RUN: -fmodule-file=mod:part1=%t/Part1.pcm \ +// RUN: -fmodule-file=mod:part2=%t/Part2.pcm \ +// RUN: -fmodule-file=mod:part3=%t/Part3.pcm \ +// RUN: -fmodule-file=mod:part4=%t/Part4.pcm \ +// RUN: -x pcm %t/Mod.pcm \ +// RUN: -include-pch %t/a.pch -o %t/Mod.o + + +//--- a.hpp +#pragma once + +class a { + virtual ~a(); + a() {} +}; + +//--- Part1.cppm +export module mod:part1; + +//--- Part2.cppm +export module mod:part2; + +//--- Part3.cppm +export module mod:part3; + +//--- Part4.cppm +export module mod:part4; + +//--- Mod.cppm +export module mod; +export import :part1; +export import :part2; +export import :part3; +export import :part4; + diff --git a/clang/test/Parser/recovery.cpp b/clang/test/Parser/recovery.cpp index 2fce67a52c6b6..261f5dc99bad4 100644 --- a/clang/test/Parser/recovery.cpp +++ b/clang/test/Parser/recovery.cpp @@ -222,3 +222,21 @@ void k() { func(1, ); // expected-error {{expected expression}} } } + +namespace GH136254 { + +void call() { + [a(42, )]() {} (); // expected-error {{expected expression}} + + int *b = new int(42, ); // expected-error {{expected expression}} + + struct S { + int c; + + S() : c(42, ) {} // expected-error {{expected expression}} + }; + + int d(42, ); // expected-error {{expected expression}} +} + +} diff --git a/clang/test/Preprocessor/arm-target-features.c b/clang/test/Preprocessor/arm-target-features.c index ecf9d7eb5c19c..27eb9a322d7c2 100644 --- a/clang/test/Preprocessor/arm-target-features.c +++ b/clang/test/Preprocessor/arm-target-features.c @@ -132,30 +132,6 @@ // CHECK-V7VE-DEFAULT-ABI-SOFT: #define __ARM_ARCH_EXT_IDIV__ 1 // CHECK-V7VE-DEFAULT-ABI-SOFT: #define __ARM_FP 0xc -// RUN: %clang -target x86_64-apple-macosx10.10 -arch armv7 -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-DARWIN-V7 %s -// CHECK-DARWIN-V7: #define __ARMEL__ 1 -// CHECK-DARWIN-V7: #define __ARM_ARCH 7 -// CHECK-DARWIN-V7: #define __ARM_ARCH_7A__ 1 -// CHECK-DARWIN-V7-NOT: __ARM_FEATURE_CRC32 -// CHECK-DARWIN-V7-NOT: __ARM_FEATURE_NUMERIC_MAXMIN -// CHECK-DARWIN-V7-NOT: __ARM_FEATURE_DIRECTED_ROUNDING -// CHECK-DARWIN-V7: #define __ARM_FP 0xc -// CHECK-DARWIN-V7: #define __ARM_NEON 1 -// CHECK-DARWIN-V7: #define __ARM_NEON_FP 0x4 -// CHECK-DARWIN-V7: #define __ARM_NEON__ 1 - -// RUN: %clang -target armv7-windows -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-WINDOWS-V7 %s -// CHECK-WINDOWS-V7: #define __ARMEL__ 1 -// CHECK-WINDOWS-V7: #define __ARM_ARCH 7 -// CHECK-WINDOWS-V7: #define __ARM_ARCH_7A__ 1 -// CHECK-WINDOWS-V7-NOT: __ARM_FEATURE_CRC32 -// CHECK-WINDOWS-V7-NOT: __ARM_FEATURE_NUMERIC_MAXMIN -// CHECK-WINDOWS-V7-NOT: __ARM_FEATURE_DIRECTED_ROUNDING -// CHECK-WINDOWS-V7: #define __ARM_FP 0xe -// CHECK-WINDOWS-V7: #define __ARM_NEON 1 -// CHECK-WINDOWS-V7: #define __ARM_NEON_FP 0x6 -// CHECK-WINDOWS-V7: #define __ARM_NEON__ 1 - // RUN: %clang -target x86_64-apple-macosx10.10 -arch armv7s -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V7S %s // CHECK-V7S: #define __ARMEL__ 1 // CHECK-V7S: #define __ARM_ARCH 7 @@ -164,9 +140,6 @@ // CHECK-V7S-NOT: __ARM_FEATURE_NUMERIC_MAXMIN // CHECK-V7S-NOT: __ARM_FEATURE_DIRECTED_ROUNDING // CHECK-V7S: #define __ARM_FP 0xe -// CHECK-V7S: #define __ARM_NEON 1 -// CHECK-V7S: #define __ARM_NEON_FP 0x6 -// CHECK-V7S: #define __ARM_NEON__ 1 // RUN: %clang -target arm-arm-none-eabi -march=armv7-m -mfloat-abi=soft -x c -E -dM %s | FileCheck -match-full-lines --check-prefix=CHECK-VFP-FP %s // RUN: %clang -target arm-arm-none-eabi -march=armv7-m -mfloat-abi=softfp -x c -E -dM %s | FileCheck -match-full-lines --check-prefix=CHECK-VFP-FP %s diff --git a/clang/test/Preprocessor/embed_constexpr.c b/clang/test/Preprocessor/embed_constexpr.c new file mode 100644 index 0000000000000..e444dfec158b5 --- /dev/null +++ b/clang/test/Preprocessor/embed_constexpr.c @@ -0,0 +1,21 @@ +// RUN: %clang_cc1 %s -fsyntax-only --embed-dir=%S/Inputs -verify -std=c23 + +static constexpr unsigned char data[] = { +#embed "big_char.txt" +}; + +static constexpr char data1[] = { +#embed "big_char.txt" // expected-error {{constexpr initializer evaluates to 255 which is not exactly representable in type 'const char'}} +}; + +static constexpr int data2[] = { +#embed "big_char.txt" +}; + +static constexpr unsigned data3[] = { +#embed "big_char.txt" suffix(, -1) // expected-error {{constexpr initializer evaluates to -1 which is not exactly representable in type 'const unsigned int'}} +}; + +static constexpr int data4[] = { +#embed "big_char.txt" suffix(, -1) +}; diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c index f267f1759cdb5..2d17891071aae 100644 --- a/clang/test/Preprocessor/predefined-arch-macros.c +++ b/clang/test/Preprocessor/predefined-arch-macros.c @@ -4394,6 +4394,9 @@ // RUN: %clang -march=arch15 -E -dM %s -o - 2>&1 \ // RUN: -target s390x-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SYSTEMZ_ARCH15 +// RUN: %clang -march=z17 -E -dM %s -o - 2>&1 \ +// RUN: -target s390x-unknown-linux \ +// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SYSTEMZ_ARCH15 // CHECK_SYSTEMZ_ARCH15: #define __ARCH__ 15 // CHECK_SYSTEMZ_ARCH15: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1 // CHECK_SYSTEMZ_ARCH15: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 1 diff --git a/clang/test/Sema/GH126231.cpp b/clang/test/Sema/GH126231.cpp new file mode 100644 index 0000000000000..d10fc79c3b628 --- /dev/null +++ b/clang/test/Sema/GH126231.cpp @@ -0,0 +1,18 @@ +// RUN: %clang_cc1 -std=c++20 -Wno-ignored-attributes -Wno-unused-value -verify %s +// expected-no-diagnostics +namespace std { +template +constexpr const T& as_const(T&) noexcept; + +// We need two declarations to see the error for some reason. +template void as_const(const T&&) noexcept = delete; +template void as_const(const T&&) noexcept; +} + +namespace GH126231 { + +void test() { + int a = 1; + std::as_const(a); +} +} diff --git a/clang/test/Sema/warn-cast-function-type-win.c b/clang/test/Sema/warn-cast-function-type-win.c new file mode 100644 index 0000000000000..4e7ba33b258d8 --- /dev/null +++ b/clang/test/Sema/warn-cast-function-type-win.c @@ -0,0 +1,36 @@ +// RUN: %clang_cc1 %s -triple x86_64-windows -fsyntax-only -Wcast-function-type -Wno-cast-function-type-strict -verify=windows +// RUN: %clang_cc1 %s -triple x86_64-windows -fsyntax-only -Wcast-function-type -Wno-cast-function-type-strict -x c++ -verify=windows +// RUN: %clang_cc1 %s -triple x86_64-pc-linux -fsyntax-only -Wcast-function-type -Wno-cast-function-type-strict -verify=linux +// RUN: %clang_cc1 %s -triple x86_64-pc-linux -fsyntax-only -Wcast-function-type -Wno-cast-function-type-strict -x c++ -verify=linux,linux-cpp +// RUN: %clang_cc1 %s -triple x86_64-windows -fsyntax-only -Wcast-function-type -Wcast-function-type-strict -x c++ -verify=strict +// windows-no-diagnostics + +// On Windows targets, this is expected to compile fine, and on non-Windows +// targets, this should diagnose the mismatch. This is to allow for idiomatic +// use of GetProcAddress, similar to what we do for dlsym. On non-Windows +// targets, this should be diagnosed. +typedef int (*FARPROC1)(); +typedef unsigned long long (*FARPROC2)(); + +FARPROC1 GetProcAddress1(void); +FARPROC2 GetProcAddress2(void); + +typedef int (*test1_type)(int); +typedef float(*test2_type)(); + +void test(void) { + // This does not diagnose on Linux in C mode because FARPROC1 has a matching + // return type to test1_type, but FARPROC1 has no prototype and so checking + // is disabled for further compatibility issues. In C++ mode, all functions + // have a prototype and so the check happens. + test1_type t1 = (test1_type)GetProcAddress1(); + // linux-cpp-warning@-1 {{cast from 'FARPROC1' (aka 'int (*)()') to 'test1_type' (aka 'int (*)(int)') converts to incompatible function type}} + // strict-warning@-2 {{cast from 'FARPROC1' (aka 'int (*)()') to 'test1_type' (aka 'int (*)(int)') converts to incompatible function type}} + + // This case is diagnosed in both C and C++ modes on Linux because the return + // type of FARPROC2 does not match the return type of test2_type. + test2_type t2 = (test2_type)GetProcAddress2(); + // linux-warning@-1 {{cast from 'FARPROC2' (aka 'unsigned long long (*)()') to 'test2_type' (aka 'float (*)()') converts to incompatible function type}} + // strict-warning@-2 {{cast from 'FARPROC2' (aka 'unsigned long long (*)()') to 'test2_type' (aka 'float (*)()') converts to incompatible function type}} +} + diff --git a/clang/test/SemaCXX/builtin-object-size-cxx14.cpp b/clang/test/SemaCXX/builtin-object-size-cxx14.cpp index b7c6f6be01f54..fdd3cb7af088f 100644 --- a/clang/test/SemaCXX/builtin-object-size-cxx14.cpp +++ b/clang/test/SemaCXX/builtin-object-size-cxx14.cpp @@ -1,5 +1,7 @@ // RUN: %clang_cc1 -fsyntax-only -verify=expected,cxx14 -std=c++14 %s // RUN: %clang_cc1 -fsyntax-only -verify -std=c++2a %s +// RUN: %clang_cc1 -fsyntax-only -verify -std=c++2b %s + typedef __SIZE_TYPE__ size_t; @@ -119,3 +121,13 @@ constexpr int bos_new() { // cxx14-error {{constant expression}} void *p = new int; // cxx14-note {{until C++20}} return __builtin_object_size(p, 0); } + + +namespace GH129397 { + +struct incomplete; +void test(incomplete &ref) { + __builtin_object_size(&ref, 1); +} + +} diff --git a/clang/test/SemaCXX/constant-expression-p2280r4.cpp b/clang/test/SemaCXX/constant-expression-p2280r4.cpp index 6c9a87267109c..87beeb4d3dc84 100644 --- a/clang/test/SemaCXX/constant-expression-p2280r4.cpp +++ b/clang/test/SemaCXX/constant-expression-p2280r4.cpp @@ -179,3 +179,24 @@ namespace extern_reference_used_as_unknown { int y; constinit int& g = (x,y); // expected-warning {{left operand of comma operator has no effect}} } + +namespace GH139452 { +struct Dummy { + explicit operator bool() const noexcept { return true; } +}; + +struct Base { int error; }; +struct Derived : virtual Base { }; + +template +constexpr R get_value() { + const auto& derived_val = Derived{}; + if (derived_val.error != 0) + /* nothing */; + return R{}; +} + +int f() { + return !get_value(); // contextually convert the function call result to bool +} +} diff --git a/clang/test/SemaCXX/ctad.cpp b/clang/test/SemaCXX/ctad.cpp index 10806f107b4ee..00a861d0f567c 100644 --- a/clang/test/SemaCXX/ctad.cpp +++ b/clang/test/SemaCXX/ctad.cpp @@ -1,5 +1,4 @@ // RUN: %clang_cc1 -fsyntax-only -verify -Wno-unused-value -std=c++20 %s -// expected-no-diagnostics namespace GH64347 { @@ -17,3 +16,134 @@ void k() { } } // namespace GH64347 + +namespace GH123591 { + + +template < typename... _Types > +struct variant { + template + variant(_Types...); +}; + +template +using AstNode = variant; + +AstNode tree(42, 43, 44); + +} + +namespace GH123591_2 { + +template +using enable_if_t = char; + +template < typename... Types > +struct variant { + template < enable_if_t> + variant(); +}; + +template +using AstNode = variant<>; +// expected-note@-1 {{couldn't infer template argument ''}} \ +// expected-note@-1 2{{implicit deduction guide declared as}} \ +// expected-note@-1 {{candidate function template not viable}} + + +AstNode tree; // expected-error {{no viable constructor or deduction guide}} + +} + +namespace GH127539 { + +template +struct A { + template + A(ArgTs...) {} +}; + +template +A(ArgTs...) -> A; + +template +using AA = A; + +AA a{}; + +} + +namespace GH129077 { + +using size_t = decltype(sizeof(0)); + +struct index_type +{ + size_t value = 0; + index_type() = default; + constexpr index_type(size_t i) noexcept : value(i) {} +}; + +template +struct extents +{ + constexpr extents(decltype(Extents)...) noexcept {} +}; + +template +extents(Extents...) -> extents<(requires { Extents::value; } ? Extents{} : ~0ull)...>; + +template +using index = extents; + +int main() +{ + extents i{0,0}; + auto j = extents<64,{}>({}, 42); + + index k{0,0}; + auto l = index<64,{}>({}, 42); + + return 0; +} + +} + +namespace GH129620 { + +template +struct A { + constexpr A(Ts...) {} +}; + +template +using Foo = A; + +template +using Bar = Foo; + +Bar a{0, 0}; + +} + +namespace GH129998 { + +struct converible_to_one { + constexpr operator int() const noexcept { return 1; } +}; + +template +struct class_template { + class_template() = default; + constexpr class_template(auto&&...) noexcept {} +}; + +template +class_template(Extents...) -> class_template<(true ? 0 : +Extents{})...>; + +template +using alias_template = class_template; + +alias_template var2{converible_to_one{}, 2}; + +} diff --git a/clang/test/SemaCXX/cxx2b-deducing-this.cpp b/clang/test/SemaCXX/cxx2b-deducing-this.cpp index 6f17ce7275456..7e392213710a4 100644 --- a/clang/test/SemaCXX/cxx2b-deducing-this.cpp +++ b/clang/test/SemaCXX/cxx2b-deducing-this.cpp @@ -1134,3 +1134,10 @@ struct S { static_assert((S{} << 11) == a); // expected-error@-1 {{use of undeclared identifier 'a'}} } + +namespace GH135522 { +struct S { + auto f(this auto) -> S; + bool g() { return f(); } // expected-error {{no viable conversion from returned value of type 'S' to function return type 'bool'}} +}; +} diff --git a/clang/test/SemaCXX/cxx2c-enum-compare.cpp b/clang/test/SemaCXX/cxx2c-enum-compare.cpp index f47278a60725e..96fbd368b1696 100644 --- a/clang/test/SemaCXX/cxx2c-enum-compare.cpp +++ b/clang/test/SemaCXX/cxx2c-enum-compare.cpp @@ -1,9 +1,10 @@ -// RUN: %clang_cc1 %s -std=c++2c -fsyntax-only -verify -triple %itanium_abi_triple +// RUN: %clang_cc1 %s -std=c++2c -fsyntax-only -verify=both,expected +// RUN: %clang_cc1 %s -std=c++2c -fsyntax-only -verify=both -Wno-enum-enum-conversion enum E1 { e }; enum E2 { f }; void test() { - int b = e <= 3.7; // expected-error {{invalid comparison of enumeration type 'E1' with floating-point type 'double'}} + int b = e <= 3.7; // both-error {{invalid comparison of enumeration type 'E1' with floating-point type 'double'}} int k = f - e; // expected-error {{invalid arithmetic between different enumeration types ('E2' and 'E1')}} int x = 1 ? e : f; // expected-error {{invalid conditional expression between different enumeration types ('E1' and 'E2')}} } diff --git a/clang/test/SemaTemplate/concepts-lambda.cpp b/clang/test/SemaTemplate/concepts-lambda.cpp index dcb09c76d26b6..1f67c2511e096 100644 --- a/clang/test/SemaTemplate/concepts-lambda.cpp +++ b/clang/test/SemaTemplate/concepts-lambda.cpp @@ -325,3 +325,18 @@ template void f() { template void f(); } + +namespace GH133719 { + +template +constexpr auto f{[] (auto arg) { + return [a{arg}] { + [] () requires true {}(); + }; +}}; + +void foo() { + f(0); +} + +} diff --git a/clang/test/SemaTemplate/cwg2398.cpp b/clang/test/SemaTemplate/cwg2398.cpp index 8592be469bb50..33b288acce82a 100644 --- a/clang/test/SemaTemplate/cwg2398.cpp +++ b/clang/test/SemaTemplate/cwg2398.cpp @@ -650,6 +650,11 @@ namespace regression3 { template struct A>; // old-error@-1 {{different template}} } // namespace regression3 +namespace GH130362 { + template